diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index f757e244e38ec9..3cea7a66d01051 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -116,8 +116,6 @@ void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) { return platform::to_void_cast(tensor.data()); case mkldnn::memory::data_type::s32: return platform::to_void_cast(tensor.data()); - case mkldnn::memory::data_type::bf16: - return platform::to_void_cast(tensor.data()); default: PADDLE_THROW( platform::errors::InvalidArgument("Wrong mkldnn type provided.")); diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index b92c47c2eb0186..6eb84ef9d7c01b 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -61,8 +61,7 @@ inline MKLDNNDataType ToMKLDNNDataType(proto::VarType::Type type) { {DataTypeTrait::DataType(), MKLDNNDataType::f32}, {DataTypeTrait::DataType(), MKLDNNDataType::s8}, {DataTypeTrait::DataType(), MKLDNNDataType::u8}, - {DataTypeTrait::DataType(), MKLDNNDataType::s32}, - {DataTypeTrait::DataType(), MKLDNNDataType::bf16}}; + {DataTypeTrait::DataType(), MKLDNNDataType::s32}}; auto iter = dict.find(static_cast(type)); if (iter != dict.end()) return iter->second; return MKLDNNDataType::undef; @@ -75,9 +74,6 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, const Tensor& in, Tensor* out); - -void* GetDataFromTensor(const Tensor& tensor, MKLDNNDataType type); - #endif std::vector GetAxis(const DataLayout& from, const DataLayout& to); diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc index 8dfad23db65178..a0d08826b854fe 100644 --- a/paddle/fluid/framework/data_layout_transform_test.cc +++ b/paddle/fluid/framework/data_layout_transform_test.cc @@ -43,17 +43,3 @@ TEST(DataTransform, DataLayoutFunction) { EXPECT_TRUE(in.layout() == paddle::framework::DataLayout::kNHWC); EXPECT_TRUE(in.dims() == paddle::framework::make_ddim({2, 3, 1, 2})); } - -#ifdef PADDLE_WITH_MKLDNN -TEST(DataTransform, GetDataFromTensorDNNL) { - auto place = paddle::platform::CPUPlace(); - paddle::framework::Tensor in = paddle::framework::Tensor(); - in.mutable_data( - paddle::framework::make_ddim({2, 3, 1, 2}), place); - - void* in_data = - paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::bf16); - EXPECT_EQ(in_data, paddle::platform::to_void_cast( - in.data())); -} -#endif diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index 8188d5cde1b904..f479d92483c1c3 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -18,7 +18,6 @@ #include using float16 = paddle::platform::float16; -using bfloat16 = paddle::platform::bfloat16; namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 720e422e114835..2c4a7b4d027274 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -17,8 +17,6 @@ limitations under the License. */ #include #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/platform/enforce.h" - -#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" namespace paddle { @@ -38,16 +36,15 @@ struct DataTypeTrait { #define _ForEachDataTypeHelper_(callback, cpp_type, proto_type) \ callback(cpp_type, ::paddle::framework::proto::VarType::proto_type); -#define _ForEachDataType_(callback) \ - _ForEachDataTypeHelper_(callback, float, FP32); \ - _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ - _ForEachDataTypeHelper_(callback, ::paddle::platform::bfloat16, BF16); \ - _ForEachDataTypeHelper_(callback, double, FP64); \ - _ForEachDataTypeHelper_(callback, int, INT32); \ - _ForEachDataTypeHelper_(callback, int64_t, INT64); \ - _ForEachDataTypeHelper_(callback, bool, BOOL); \ - _ForEachDataTypeHelper_(callback, uint8_t, UINT8); \ - _ForEachDataTypeHelper_(callback, int16_t, INT16); \ +#define _ForEachDataType_(callback) \ + _ForEachDataTypeHelper_(callback, float, FP32); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ + _ForEachDataTypeHelper_(callback, double, FP64); \ + _ForEachDataTypeHelper_(callback, int, INT32); \ + _ForEachDataTypeHelper_(callback, int64_t, INT64); \ + _ForEachDataTypeHelper_(callback, bool, BOOL); \ + _ForEachDataTypeHelper_(callback, uint8_t, UINT8); \ + _ForEachDataTypeHelper_(callback, int16_t, INT16); \ _ForEachDataTypeHelper_(callback, int8_t, INT8) #define _ForEachDataTypeSmall_(callback) \ diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 331596da33acc1..2a380201f297f4 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -38,25 +38,3 @@ TEST(DataType, float16) { std::string type = "::paddle::platform::float16"; EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str()); } - -TEST(DataType, bfloat16) { - using paddle::framework::Tensor; - using paddle::platform::CPUPlace; - using paddle::platform::bfloat16; - namespace f = paddle::framework; - f::proto::VarType::Type dtype = f::proto::VarType::BF16; - - Tensor tensor; - CPUPlace cpu; - tensor.mutable_data(cpu, dtype); - - // test bf16 tensor - EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16))); - - // test bf16 size - EXPECT_EQ(f::SizeOfType(dtype), 2u); - - // test debug info - std::string type = "::paddle::platform::bfloat16"; - EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str()); -} diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 3d56152c237695..44542f05d9d5c9 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -77,10 +77,6 @@ void TransDataType(const OpKernelType& kernel_type_for_var, framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; - case proto::VarType::BF16: - framework::VisitDataType(dst_type, - CastDataType(in, out, ctx)); - break; case proto::VarType::FP32: framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; diff --git a/paddle/fluid/framework/data_type_transform_test.cc b/paddle/fluid/framework/data_type_transform_test.cc index ea7a665bcbe02f..bbebea9f13fd37 100644 --- a/paddle/fluid/framework/data_type_transform_test.cc +++ b/paddle/fluid/framework/data_type_transform_test.cc @@ -24,11 +24,6 @@ TEST(DataTypeTransform, CPUTransform) { paddle::framework::DataLayout::kAnyLayout, paddle::framework::LibraryType::kPlain); - auto kernel_bf16 = paddle::framework::OpKernelType( - paddle::framework::proto::VarType::BF16, place, - paddle::framework::DataLayout::kAnyLayout, - paddle::framework::LibraryType::kPlain); - auto kernel_fp32 = paddle::framework::OpKernelType( paddle::framework::proto::VarType::FP32, place, paddle::framework::DataLayout::kAnyLayout, @@ -194,120 +189,4 @@ TEST(DataTypeTransform, CPUTransform) { static_cast(in_data_bool[i]).x); } } - - // data type transform from/to bfloat16 - { - paddle::framework::Tensor in; - paddle::framework::Tensor out; - - paddle::platform::bfloat16* ptr = - in.mutable_data( - paddle::framework::make_ddim({2, 3}), place); - int data_number = 2 * 3; - - for (int i = 0; i < data_number; ++i) { - ptr[i] = i; - } - - // transform from bfloat16 to other data types - paddle::framework::TransDataType(kernel_bf16, kernel_fp32, in, &out); - float* out_data_float = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(out_data_float[i], static_cast(ptr[i])); - } - - paddle::framework::TransDataType(kernel_bf16, kernel_fp64, in, &out); - double* out_data_double = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(out_data_double[i], static_cast(ptr[i])); - } - - paddle::framework::TransDataType(kernel_bf16, kernel_int32, in, &out); - int* out_data_int = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(out_data_int[i], static_cast(ptr[i])); - } - - paddle::framework::TransDataType(kernel_bf16, kernel_int64, in, &out); - int64_t* out_data_int64 = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(out_data_int64[i], static_cast(ptr[i])); - } - - paddle::framework::TransDataType(kernel_bf16, kernel_bool, in, &out); - bool* out_data_bool = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(out_data_bool[i], static_cast(ptr[i])); - } - - // transform float to bfloat16 - float* in_data_float = - in.mutable_data(paddle::framework::make_ddim({2, 3}), place); - for (int i = 0; i < data_number; ++i) { - in_data_float[i] = i; - } - - paddle::framework::TransDataType(kernel_fp32, kernel_bf16, in, &out); - ptr = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(ptr[i].x, - static_cast(in_data_float[i]).x); - } - - // transform double to bfloat16 - double* in_data_double = - in.mutable_data(paddle::framework::make_ddim({2, 3}), place); - for (int i = 0; i < data_number; ++i) { - in_data_double[i] = i; - } - - paddle::framework::TransDataType(kernel_fp64, kernel_bf16, in, &out); - ptr = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(ptr[i].x, - static_cast(in_data_double[i]).x); - } - - // transform int to bfloat16 - int* in_data_int = - in.mutable_data(paddle::framework::make_ddim({2, 3}), place); - for (int i = 0; i < data_number; ++i) { - in_data_int[i] = i; - } - - paddle::framework::TransDataType(kernel_int32, kernel_bf16, in, &out); - ptr = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(ptr[i].x, - static_cast(in_data_int[i]).x); - } - - // transform int64 to bfloat16 - int64_t* in_data_int64 = - in.mutable_data(paddle::framework::make_ddim({2, 3}), place); - for (int i = 0; i < data_number; ++i) { - in_data_int64[i] = i; - } - - paddle::framework::TransDataType(kernel_int64, kernel_bf16, in, &out); - ptr = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(ptr[i].x, - static_cast(in_data_int64[i]).x); - } - - // transform bool to bfloat16 - bool* in_data_bool = - in.mutable_data(paddle::framework::make_ddim({2, 3}), place); - for (int i = 0; i < data_number; ++i) { - in_data_bool[i] = i; - } - - paddle::framework::TransDataType(kernel_bool, kernel_bf16, in, &out); - ptr = out.data(); - for (int i = 0; i < data_number; ++i) { - EXPECT_EQ(ptr[i].x, - static_cast(in_data_bool[i]).x); - } - } } diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index 67a4c0caf8e19d..956b099e883f9e 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -167,8 +167,6 @@ static void PrintNanInf(const T* value, const size_t numel, int print_num, // more detail see: 180 page of // https://www.openmp.org/wp-content/uploads/OpenMP4.0.0.pdf #pragma omp declare reduction(+ : paddle::platform::float16 : omp_out += omp_in) -#pragma omp declare reduction(+ : paddle::platform::bfloat16 : omp_out += \ - omp_in) #endif template diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 915589b3242b7d..180b33d0cb72e2 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -23,7 +23,6 @@ template static ::DLDataType GetDLDataTypeCode() { ::DLDataType dtype; if (std::is_same::value || - std::is_same::value || std::is_floating_point::value) { dtype.code = kDLFloat; } else if (std::is_unsigned::value) { diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 9eb8478515727c..6fbf880356c541 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -90,6 +90,32 @@ void MemoryOptimizePass::CollectLifeCycle( } } +// TODO(Superjomn) Make this a general help method. +int DataTypeToSpace(framework::proto::VarType_Type type) { + switch (type) { + case framework::proto::VarType_Type_BOOL: + return sizeof(bool); + case framework::proto::VarType_Type_FP32: + return sizeof(float); + case framework::proto::VarType_Type_INT32: + return sizeof(int32_t); + case framework::proto::VarType_Type_INT64: + return sizeof(int64_t); + case framework::proto::VarType_Type_INT16: + return sizeof(int16_t); + case framework::proto::VarType_Type_FP16: + return sizeof(int16_t); + case framework::proto::VarType_Type_FP64: + return sizeof(double); + case framework::proto::VarType_Type_UINT8: + return sizeof(unsigned char); + case framework::proto::VarType_Type_INT8: + return sizeof(int8_t); + default: + PADDLE_THROW("Unknown data type"); + } +} + void MemoryOptimizePass::CollectVarMemorySize( space_table_t* space_table) const { const int fake_batch_size = 1; @@ -137,7 +163,7 @@ void MemoryOptimizePass::CollectVarMemorySize( int size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); (*space_table)[node->Var()->Name()] = - size * paddle::framework::SizeOfType(node->Var()->GetDataType()); + size * DataTypeToSpace(node->Var()->GetDataType()); } } } diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc index d29bcb76be78f1..325c7ab2539f28 100644 --- a/paddle/fluid/inference/lite/test_engine.cc +++ b/paddle/fluid/inference/lite/test_engine.cc @@ -14,16 +14,15 @@ #include +#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/lite/ut_helper.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/lite/engine.h" -#include "paddle/fluid/operators/lite/ut_helper.h" - namespace paddle { namespace inference { namespace lite { diff --git a/paddle/fluid/operators/math/concat_and_split.h b/paddle/fluid/operators/math/concat_and_split.h index 18d9a6310dd6c0..3a5eddcbf4af69 100644 --- a/paddle/fluid/operators/math/concat_and_split.h +++ b/paddle/fluid/operators/math/concat_and_split.h @@ -65,14 +65,13 @@ class SplitFunctor { } // namespace operators } // namespace paddle -#define FOR_ALL_TYPES(macro) \ - macro(int); \ - macro(float); \ - macro(double); \ - macro(bool); \ - macro(int64_t); \ - macro(int16_t); \ - macro(uint8_t); \ - macro(int8_t); \ - macro(::paddle::platform::float16); \ - macro(::paddle::platform::bfloat16) +#define FOR_ALL_TYPES(macro) \ + macro(int); \ + macro(float); \ + macro(double); \ + macro(bool); \ + macro(int64_t); \ + macro(int16_t); \ + macro(uint8_t); \ + macro(int8_t); \ + macro(::paddle::platform::float16) diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 824e66b1eb4ae0..6748d0ab43f70f 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -34,7 +34,6 @@ namespace math { using float16 = paddle::platform::float16; template struct SetConstant; -template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; @@ -42,18 +41,16 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ template struct Transpose; DEFINE_CPU_TRANS(1); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index ef827fd74903af..652b4dd47daa8a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -136,8 +136,6 @@ cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) -cc_test(bfloat16_test SRCS bfloat16_test.cc DEPS lod_tensor) - nv_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags) nv_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info) diff --git a/paddle/fluid/platform/bfloat16.h b/paddle/fluid/platform/bfloat16.h deleted file mode 100644 index 742329abb2dae2..00000000000000 --- a/paddle/fluid/platform/bfloat16.h +++ /dev/null @@ -1,439 +0,0 @@ -// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#if !defined(_WIN32) -#define PADDLE_ALIGN(x) __attribute__((aligned(x))) -#else -#define PADDLE_ALIGN(x) __declspec(align(x)) -#endif - -#include -#include "paddle/fluid/platform/hostdevice.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { -namespace platform { - -struct PADDLE_ALIGN(2) bfloat16 { - public: - uint16_t x; - - bfloat16() = default; - bfloat16(const bfloat16& o) = default; - bfloat16& operator=(const bfloat16& o) = default; - bfloat16(bfloat16&& o) = default; - bfloat16& operator=(bfloat16&& o) = default; - ~bfloat16() = default; - - HOSTDEVICE inline explicit bfloat16(float val) { - std::memcpy(&x, reinterpret_cast(&val) + 2, 2); - } - - template - HOSTDEVICE inline explicit bfloat16(const T& val) - : x(bfloat16(static_cast(val)).x) {} - - HOSTDEVICE inline bfloat16& operator=(bool b) { - x = b ? 0x3f80 : 0; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(int8_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(uint8_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(int16_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(uint16_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(int32_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(uint32_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(int64_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(uint64_t val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(float val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline bfloat16& operator=(double val) { - x = bfloat16(val).x; - return *this; - } - - HOSTDEVICE inline explicit operator float() const { - float val = 0.f; - uint16_t temp = x; - memcpy(reinterpret_cast(&val) + 2, reinterpret_cast(&temp), - 2); - return val; - } - - HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; } - - HOSTDEVICE inline explicit operator int8_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator uint8_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator int16_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator uint16_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator int32_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator uint32_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator int64_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator uint64_t() const { - return static_cast(static_cast(*this)); - } - - HOSTDEVICE inline explicit operator double() const { - return static_cast(static_cast(*this)); - } -}; - -HOSTDEVICE inline bfloat16 operator+(const bfloat16& a, const bfloat16& b) { - return bfloat16(static_cast(a) + static_cast(b)); -} - -HOSTDEVICE inline bfloat16 operator-(const bfloat16& a, const bfloat16& b) { - return bfloat16(static_cast(a) - static_cast(b)); -} - -HOSTDEVICE inline bfloat16 operator*(const bfloat16& a, const bfloat16& b) { - return bfloat16(static_cast(a) * static_cast(b)); -} - -HOSTDEVICE inline bfloat16 operator/(const bfloat16& a, const bfloat16& b) { - return bfloat16(static_cast(a) / static_cast(b)); -} - -HOSTDEVICE inline bfloat16 operator-(const bfloat16& a) { - bfloat16 res; - res.x = a.x ^ 0x8000; - return res; -} - -HOSTDEVICE inline bfloat16& operator+=(bfloat16& a, // NOLINT - const bfloat16& b) { - a = bfloat16(static_cast(a) + static_cast(b)); - return a; -} - -HOSTDEVICE inline bfloat16& operator-=(bfloat16& a, // NOLINT - const bfloat16& b) { - a = bfloat16(static_cast(a) - static_cast(b)); - return a; -} - -HOSTDEVICE inline bfloat16& operator*=(bfloat16& a, // NOLINT - const bfloat16& b) { - a = bfloat16(static_cast(a) * static_cast(b)); - return a; -} - -HOSTDEVICE inline bfloat16& operator/=(bfloat16& a, // NOLINT - const bfloat16& b) { - a = bfloat16(static_cast(a) / static_cast(b)); - return a; -} - -HOSTDEVICE inline bfloat16 raw_uint16_to_bfloat16(uint16_t a) { - bfloat16 res; - res.x = a; - return res; -} - -HOSTDEVICE inline bool operator==(const bfloat16& a, const bfloat16& b) { - return static_cast(a) == static_cast(b); -} - -HOSTDEVICE inline bool operator!=(const bfloat16& a, const bfloat16& b) { - return static_cast(a) != static_cast(b); -} - -HOSTDEVICE inline bool operator<(const bfloat16& a, const bfloat16& b) { - return static_cast(a) < static_cast(b); -} - -HOSTDEVICE inline bool operator<=(const bfloat16& a, const bfloat16& b) { - return static_cast(a) <= static_cast(b); -} - -HOSTDEVICE inline bool operator>(const bfloat16& a, const bfloat16& b) { - return static_cast(a) > static_cast(b); -} - -HOSTDEVICE inline bool operator>=(const bfloat16& a, const bfloat16& b) { - return static_cast(a) >= static_cast(b); -} - -HOSTDEVICE inline bool(isnan)(const bfloat16& a) { - return (a.x & 0x7FFF) > 0x7F80; -} - -HOSTDEVICE inline bool(isinf)(const bfloat16& a) { - return (a.x & 0x7F80) == 0x7F80; -} - -HOSTDEVICE inline bool(isfinite)(const bfloat16& a) { - return !((isnan)(a)) && !((isinf)(a)); -} - -inline std::ostream& operator<<(std::ostream& os, const bfloat16& a) { - os << a.x; - return os; -} - -} // namespace platform -} // namespace paddle - -namespace std { - -template <> -struct is_pod { - static const bool value = - is_trivial::value && - is_standard_layout::value; -}; - -template <> -struct is_floating_point - : std::integral_constant< - bool, std::is_same::type>::value> {}; -template <> -struct is_signed { - static const bool value = true; -}; - -template <> -struct is_unsigned { - static const bool value = false; -}; - -inline bool isnan(const paddle::platform::bfloat16& a) { - return paddle::platform::isnan(a); -} - -inline bool isinf(const paddle::platform::bfloat16& a) { - return paddle::platform::isinf(a); -} - -template <> -struct numeric_limits { - static const bool is_specialized = true; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const bool has_infinity = true; - static const bool has_quiet_NaN = true; - static const bool has_signaling_NaN = true; - static const float_denorm_style has_denorm = denorm_present; - static const bool has_denorm_loss = false; - static const std::float_round_style round_style = std::round_to_nearest; - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const int digits = 8; - static const int digits10 = 2; - static const int max_digits10 = 9; - static const int radix = 2; - static const int min_exponent = -125; - static const int min_exponent10 = -37; - static const int max_exponent = 128; - static const int max_exponent10 = 38; - static const bool traps = true; - static const bool tinyness_before = false; - - static paddle::platform::bfloat16(min)() { - return paddle::platform::raw_uint16_to_bfloat16(0x007f); - } - static paddle::platform::bfloat16 lowest() { - return paddle::platform::raw_uint16_to_bfloat16(0xff7f); - } - static paddle::platform::bfloat16(max)() { - return paddle::platform::raw_uint16_to_bfloat16(0x7f7f); - } - static paddle::platform::bfloat16 epsilon() { - return paddle::platform::raw_uint16_to_bfloat16(0x3400); - } - static paddle::platform::bfloat16 round_error() { - return paddle::platform::bfloat16(0.5); - } - static paddle::platform::bfloat16 infinity() { - return paddle::platform::raw_uint16_to_bfloat16(0x7f80); - } - static paddle::platform::bfloat16 quiet_NaN() { - return paddle::platform::raw_uint16_to_bfloat16(0xffc1); - } - static paddle::platform::bfloat16 signaling_NaN() { - return paddle::platform::raw_uint16_to_bfloat16(0xff81); - } - static paddle::platform::bfloat16 denorm_min() { - return paddle::platform::raw_uint16_to_bfloat16(0x0001); - } -}; - -} // namespace std - -namespace Eigen { - -using bfloat16 = paddle::platform::bfloat16; - -template <> -struct NumTraits : GenericNumTraits { - enum { - IsSigned = true, - IsInteger = false, - IsComplex = false, - RequireInitialization = false - }; - - HOSTDEVICE static inline bfloat16 epsilon() { - return paddle::platform::raw_uint16_to_bfloat16(0x3400); - } - HOSTDEVICE static inline bfloat16 dummy_precision() { - return bfloat16(1e-5f); - } - HOSTDEVICE static inline bfloat16 highest() { - return paddle::platform::raw_uint16_to_bfloat16(0x7f7f); - } - HOSTDEVICE static inline bfloat16 lowest() { - return paddle::platform::raw_uint16_to_bfloat16(0xff7f); - } - HOSTDEVICE static inline bfloat16 infinity() { - return paddle::platform::raw_uint16_to_bfloat16(0x7f80); - } - HOSTDEVICE static inline bfloat16 quiet_NaN() { - return paddle::platform::raw_uint16_to_bfloat16(0xffc1); - } -}; -namespace numext { - -template <> -HOSTDEVICE inline bool(isnan)(const bfloat16& a) { - return (paddle::platform::isnan)(a); -} - -template <> -HOSTDEVICE inline bool(isinf)(const bfloat16& a) { - return (paddle::platform::isinf)(a); -} - -template <> -HOSTDEVICE inline bool(isfinite)(const bfloat16& a) { - return (paddle::platform::isfinite)(a); -} - -template <> -HOSTDEVICE inline bfloat16 exp(const bfloat16& a) { - return bfloat16(::expf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 erf(const bfloat16& a) { - return bfloat16(::erff(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 log(const bfloat16& a) { - return bfloat16(::logf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 tanh(const bfloat16& a) { - return bfloat16(::tanhf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 sqrt(const bfloat16& a) { - return bfloat16(::sqrtf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 ceil(const bfloat16& a) { - return bfloat16(::ceilf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 floor(const bfloat16& a) { - return bfloat16(::floorf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 round(const bfloat16& a) { - return bfloat16(::roundf(static_cast(a))); -} - -template <> -HOSTDEVICE inline bfloat16 pow(const bfloat16& a, const bfloat16& b) { - return bfloat16(::powf(static_cast(a), static_cast(b))); -} - -template <> -HOSTDEVICE inline bfloat16 abs(const bfloat16& a) { - return bfloat16(::fabs(static_cast(a))); -} - -} // namespace numext -} // namespace Eigen diff --git a/paddle/fluid/platform/bfloat16_test.cc b/paddle/fluid/platform/bfloat16_test.cc deleted file mode 100644 index bdb508ee336300..00000000000000 --- a/paddle/fluid/platform/bfloat16_test.cc +++ /dev/null @@ -1,162 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/platform/bfloat16.h" - -#include - -#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "gtest/gtest.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/init.h" - -namespace paddle { -namespace platform { - -using bfloat16 = paddle::platform::bfloat16; - -TEST(bfloat16, conversion_cpu) { - // Conversion from float - EXPECT_EQ(bfloat16(1.0f).x, 0x3f80); - EXPECT_EQ(bfloat16(0.5f).x, 0x3f00); - EXPECT_EQ(bfloat16(0.33333f).x, 0x3eaa); - EXPECT_EQ(bfloat16(0.0f).x, 0x0000); - EXPECT_EQ(bfloat16(-0.0f).x, 0x8000); - EXPECT_EQ(bfloat16(65504.0f).x, 0x477f); - EXPECT_EQ(bfloat16(65536.0f).x, 0x4780); - - // Conversion from double - EXPECT_EQ(bfloat16(1.0).x, 0x3f80); - EXPECT_EQ(bfloat16(0.5).x, 0x3f00); - EXPECT_EQ(bfloat16(0.33333).x, 0x3eaa); - EXPECT_EQ(bfloat16(0.0).x, 0x0000); - EXPECT_EQ(bfloat16(-0.0).x, 0x8000); - EXPECT_EQ(bfloat16(65504.0).x, 0x477f); - EXPECT_EQ(bfloat16(65536.0).x, 0x4780); - - // Conversion from int - EXPECT_EQ(bfloat16(-1).x, 0xbf80); - EXPECT_EQ(bfloat16(0).x, 0x0000); - EXPECT_EQ(bfloat16(1).x, 0x3f80); - EXPECT_EQ(bfloat16(2).x, 0x4000); - EXPECT_EQ(bfloat16(3).x, 0x4040); - - // Conversion from bool - EXPECT_EQ(bfloat16(true).x, 0x3f80); - EXPECT_EQ(bfloat16(false).x, 0x0000); - - // Assignment operator - bfloat16 v_assign; - v_assign = bfloat16(0.f); - EXPECT_EQ(v_assign.x, 0x0000); - v_assign = 0.5f; - EXPECT_EQ(v_assign.x, 0x3f00); - v_assign = 0.33333; - EXPECT_EQ(v_assign.x, 0x3eaa); - v_assign = -1; - EXPECT_EQ(v_assign.x, 0xbf80); - - // Conversion operator - EXPECT_EQ(static_cast(bfloat16(0.5f)), 0.5f); - EXPECT_NEAR(static_cast(bfloat16(0.33333)), 0.33333, 0.01); - EXPECT_EQ(static_cast(bfloat16(-1)), -1); - EXPECT_EQ(static_cast(bfloat16(true)), true); -} - -TEST(bfloat16, arithmetic_cpu) { - EXPECT_NEAR(static_cast(bfloat16(1) + bfloat16(1)), 2, 0.001); - EXPECT_EQ(static_cast(bfloat16(5) + bfloat16(-5)), 0); - EXPECT_NEAR(static_cast(bfloat16(0.33333f) + bfloat16(0.66667f)), 1.0f, - 0.01); - EXPECT_EQ(static_cast(bfloat16(3) - bfloat16(5)), -2); - EXPECT_NEAR(static_cast(bfloat16(0.66667f) - bfloat16(0.33333f)), - 0.33334f, 0.01); - EXPECT_NEAR(static_cast(bfloat16(3.3f) * bfloat16(2.0f)), 6.6f, 0.01); - EXPECT_NEAR(static_cast(bfloat16(-2.1f) * bfloat16(-3.0f)), 6.3f, 0.1); - EXPECT_NEAR(static_cast(bfloat16(2.0f) / bfloat16(3.0f)), 0.66667f, - 0.01); - EXPECT_EQ(static_cast(bfloat16(1.0f) / bfloat16(2.0f)), 0.5f); - EXPECT_EQ(static_cast(-bfloat16(512.0f)), -512.0f); - EXPECT_EQ(static_cast(-bfloat16(-512.0f)), 512.0f); -} - -TEST(bfloat16, comparison_cpu) { - EXPECT_TRUE(bfloat16(1.0f) == bfloat16(1.0f)); - EXPECT_FALSE(bfloat16(-1.0f) == bfloat16(-0.5f)); - EXPECT_TRUE(bfloat16(1.0f) != bfloat16(0.5f)); - EXPECT_FALSE(bfloat16(-1.0f) != bfloat16(-1.0f)); - EXPECT_TRUE(bfloat16(1.0f) < bfloat16(2.0f)); - EXPECT_FALSE(bfloat16(-1.0f) < bfloat16(-1.0f)); - EXPECT_TRUE(bfloat16(1.0f) <= bfloat16(1.0f)); - EXPECT_TRUE(bfloat16(2.0f) > bfloat16(1.0f)); - EXPECT_FALSE(bfloat16(-2.0f) > bfloat16(-2.0f)); - EXPECT_TRUE(bfloat16(2.0f) >= bfloat16(2.0f)); -} - -TEST(bfloat16, lod_tensor_cpu) { - framework::LoDTensor lod_tensor; - - std::vector input_data = {bfloat16(1.0f), bfloat16(0.5f), - bfloat16(0.33333f), bfloat16(0.0f)}; - EXPECT_EQ(input_data[0].x, 0x3f80); - EXPECT_EQ(input_data[1].x, 0x3f00); - EXPECT_EQ(input_data[2].x, 0x3eaa); - EXPECT_EQ(input_data[3].x, 0x0000); - - lod_tensor.Resize({4, 1}); - lod_tensor.set_lod(framework::LoD({{0, 2, 4}})); - bfloat16* data_ptr = lod_tensor.mutable_data(CPUPlace()); - - EXPECT_NE(data_ptr, nullptr); - EXPECT_EQ(input_data.size(), static_cast(lod_tensor.numel())); - for (size_t i = 0; i < input_data.size(); ++i) { - data_ptr[i] = input_data[i]; - EXPECT_EQ(data_ptr[i].x, input_data[i].x); - } -} - -TEST(bfloat16, floating) { - // compile time assert. - PADDLE_ENFORCE_EQ( - std::is_floating_point::value, true, - platform::errors::Fatal("std::is_floating_point with bfloat16 data type " - "should be equal to true but it is not")); -} - -TEST(bfloat16, print) { - bfloat16 a = bfloat16(1.0f); - std::cout << a << std::endl; -} - -// CPU test -TEST(bfloat16, isinf) { - bfloat16 a; - a.x = 0x7f80; - bfloat16 b = bfloat16(INFINITY); - bfloat16 c = static_cast(INFINITY); - EXPECT_EQ(std::isinf(a), true); - EXPECT_EQ(std::isinf(b), true); - EXPECT_EQ(std::isinf(c), true); -} - -TEST(bfloat16, isnan) { - bfloat16 a; - a.x = 0x7fff; - bfloat16 b = bfloat16(NAN); - bfloat16 c = static_cast(NAN); - EXPECT_EQ(std::isnan(a), true); - EXPECT_EQ(std::isnan(b), true); - EXPECT_EQ(std::isnan(c), true); -} - -} // namespace platform -} // namespace paddle diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 8fb66c6f34bd84..3782eb684f21f8 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -161,12 +161,6 @@ inline mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::data_type::u8; } -template <> -inline mkldnn::memory::data_type -MKLDNNGetDataType() { - return mkldnn::memory::data_type::bf16; -} - inline void Reorder(mkldnn::memory src, mkldnn::memory dst, const mkldnn::engine& engine) { auto reorder_prim = mkldnn::reorder(src, dst); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 5ee15073267b6e..4377a8c2cef5aa 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -26,7 +26,6 @@ limitations under the License. */ #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" -#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "pybind11/numpy.h" @@ -105,7 +104,6 @@ struct ValidDTypeToPyArrayChecker { } DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::float16); -DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::bfloat16); DECLARE_VALID_DTYPE_TO_PY_ARRAY(float); DECLARE_VALID_DTYPE_TO_PY_ARRAY(double); DECLARE_VALID_DTYPE_TO_PY_ARRAY(bool); @@ -121,9 +119,6 @@ inline std::string TensorDTypeToPyDTypeStr( if (type == proto_type) { \ if (std::is_same::value) { \ return "e"; \ - } else if (std::is_same::value) { \ - /* NumPy character code of uint16 due to no support for bfloat16 */ \ - return "H"; \ } else { \ constexpr auto kIsValidDType = ValidDTypeToPyArrayChecker::kValue; \ PADDLE_ENFORCE_EQ( \ @@ -267,10 +262,10 @@ void SetTensorFromPyArray(framework::Tensor *self, const py::object &obj, SetTensorFromPyArrayT(self, array, place, zero_copy); } else if (py::isinstance>(array)) { - // since there is still no support for bfloat16 in NumPy, - // uint16 is used for casting bfloat16 - SetTensorFromPyArrayT(self, array, place, - zero_copy); + // TODO(cql): temporary keeping uint16, which is used for casting float16 + // before. It should be depracated later. + SetTensorFromPyArrayT(self, array, place, + zero_copy); } else if (py::isinstance>(array)) { SetTensorFromPyArrayT(self, array, place, zero_copy); } else { @@ -484,8 +479,6 @@ inline framework::Tensor *_sliceTensor(const framework::Tensor &self, switch (src_type) { case framework::proto::VarType::FP16: return _sliceAndConcat(self, obj, dim); - case framework::proto::VarType::BF16: - return _sliceAndConcat(self, obj, dim); case framework::proto::VarType::FP32: return _sliceAndConcat(self, obj, dim); case framework::proto::VarType::FP64: