From 5c9aaf7d5c5bde858f567857e309e12ceeab0a1f Mon Sep 17 00:00:00 2001 From: JamesLim-sy Date: Thu, 7 Jul 2022 02:18:58 +0000 Subject: [PATCH 1/4] first commit --- paddle/phi/kernels/autotune/auto_tune_base.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/paddle/phi/kernels/autotune/auto_tune_base.h b/paddle/phi/kernels/autotune/auto_tune_base.h index 91685c2ed547c..3fd3b090c77b5 100644 --- a/paddle/phi/kernels/autotune/auto_tune_base.h +++ b/paddle/phi/kernels/autotune/auto_tune_base.h @@ -153,20 +153,15 @@ template class TransposeAutoTuner : public AutoTuneBase { public: static AutoTuneBase* Instance(KernelType kernel) { + static std::once_flag transpose_init_flag_; static std::unique_ptr> instance_; - std::call_once(init_flag_, [&] { + std::call_once(transpose_init_flag_, [&] { instance_.reset(new AutoTuneBase(kernel)); }); return instance_.get(); } - - private: - static std::once_flag init_flag_; }; -template -std::once_flag TransposeAutoTuner::init_flag_; - template static AutoTuneBase>* MakeTransposeTuner(RetureType (*func)(Args...)) { From 6d2d61587ede9ea97d5eacd167968b7152b80503 Mon Sep 17 00:00:00 2001 From: JamesLim-sy Date: Tue, 19 Jul 2022 02:18:00 +0000 Subject: [PATCH 2/4] clarify the quotes --- paddle/phi/kernels/autotune/auto_tune_base.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/phi/kernels/autotune/auto_tune_base.h b/paddle/phi/kernels/autotune/auto_tune_base.h index 3fd3b090c77b5..4831ea3e55485 100644 --- a/paddle/phi/kernels/autotune/auto_tune_base.h +++ b/paddle/phi/kernels/autotune/auto_tune_base.h @@ -120,8 +120,7 @@ class AutoTuneBase { template float RunAndMeasureKernel(const Context& ctx, const int idx, Args&&... args) { - // Regard 1st run as warmup. Judge the result by the time cost of rest run - // cycles. + // Regard 1st run as warmup, judge the result by the time cost of rest cycles. constexpr int repeats = 3; phi::GpuTimer timer; float time_cost = 0; From 66db14cd69e65834e04b5a960bb99cfbd92d55a7 Mon Sep 17 00:00:00 2001 From: JamesLim-sy Date: Tue, 19 Jul 2022 02:52:21 +0000 Subject: [PATCH 3/4] change code style format --- paddle/phi/kernels/autotune/auto_tune_base.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/phi/kernels/autotune/auto_tune_base.h b/paddle/phi/kernels/autotune/auto_tune_base.h index 4831ea3e55485..054006d84934f 100644 --- a/paddle/phi/kernels/autotune/auto_tune_base.h +++ b/paddle/phi/kernels/autotune/auto_tune_base.h @@ -120,7 +120,8 @@ class AutoTuneBase { template float RunAndMeasureKernel(const Context& ctx, const int idx, Args&&... args) { - // Regard 1st run as warmup, judge the result by the time cost of rest cycles. + // Regard 1st run as warmup and judge the compare result by the time cost + // of rest cycles. constexpr int repeats = 3; phi::GpuTimer timer; float time_cost = 0; From e8419fe61f382e7f5b2d3762444a53155a9f9f60 Mon Sep 17 00:00:00 2001 From: JamesLim-sy Date: Thu, 22 Sep 2022 19:14:42 +0800 Subject: [PATCH 4/4] support bfloat16 --- paddle/phi/kernels/kps/compare_kernel.cu | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/paddle/phi/kernels/kps/compare_kernel.cu b/paddle/phi/kernels/kps/compare_kernel.cu index b981d802255a2..b882fcc2a6c96 100644 --- a/paddle/phi/kernels/kps/compare_kernel.cu +++ b/paddle/phi/kernels/kps/compare_kernel.cu @@ -114,7 +114,8 @@ PD_REGISTER_KERNEL(less_than, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(less_equal, KPS, ALL_LAYOUT, @@ -125,7 +126,8 @@ PD_REGISTER_KERNEL(less_equal, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(greater_than, KPS, ALL_LAYOUT, @@ -136,7 +138,8 @@ PD_REGISTER_KERNEL(greater_than, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(greater_equal, KPS, ALL_LAYOUT, @@ -147,7 +150,8 @@ PD_REGISTER_KERNEL(greater_equal, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(equal, KPS, ALL_LAYOUT, @@ -158,7 +162,8 @@ PD_REGISTER_KERNEL(equal, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(not_equal, KPS, ALL_LAYOUT, @@ -169,7 +174,8 @@ PD_REGISTER_KERNEL(not_equal, int64_t, float, double, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(equal_all, KPS,