PaddlePaddle · mjp9527 · Nov 18, 2022 · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022
@@ -74,6 +74,7 @@ void MemoryOptimizePass::CollectLifeCycleByDevice(
       "fetch",
       "cast",
       "expand",
+      "share_data",
   };
 
   auto insert_invalid_op_nodes_for_specific_target = [&](

@@ -158,3 +158,8 @@ REGISTER_LITE_KERNEL(reduce_max, kARM, kFloat, kNCHW, int64_reduce_max, i64)
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
     .Finalize();
+using int32_reduce_max = paddle::lite::kernels::arm::ReduceMaxCompute<int32_t>;
+REGISTER_LITE_KERNEL(reduce_max, kARM, kFloat, kNCHW, int32_reduce_max, i32)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
+    .Finalize();
@@ -45,8 +45,9 @@ void ReduceSumCompute<T, Ptype>::Run() {
     if (x_vec.size() >= 5 && x_vec[0] == 1) {
       x_vec.erase(x_vec.begin());
       for (auto& val : dim) val--;
-    } else
+    } else {
       break;
+    }
   }
   auto x_dims = lite::DDim(x_vec);
 
@@ -116,6 +117,8 @@ using reduce_sum_arm_int32 =
     paddle::lite::kernels::arm::ReduceSumCompute<int, PRECISION(kFloat)>;
 using reduce_sum_arm_float =
     paddle::lite::kernels::arm::ReduceSumCompute<float, PRECISION(kFloat)>;
+using reduce_sum_arm_int64 =
+    paddle::lite::kernels::arm::ReduceSumCompute<int64_t, PRECISION(kFloat)>;
 REGISTER_LITE_KERNEL(
     reduce_sum, kARM, kFloat, kNCHW, reduce_sum_arm_int32, def_int32)
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
@@ -126,3 +129,9 @@ REGISTER_LITE_KERNEL(reduce_sum, kARM, kFloat, kNCHW, reduce_sum_arm_float, def)
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kFloat))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(
+    reduce_sum, kARM, kFloat, kNCHW, reduce_sum_arm_int64, def_int64)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
+    .Finalize();
@@ -117,6 +117,9 @@ add_kernel(merge_lod_tensor_compute Host extra SRCS merge_lod_tensor_compute.cc)
 add_kernel(im2sequence_compute Host extra SRCS im2sequence_compute.cc)
 add_kernel(log_softmax_compute Host extra SRCS log_softmax_compute.cc)
 add_kernel(roll_compute Host extra SRCS roll_compute.cc)
+add_kernel(set_value Host extra SRCS set_value_compute.cc)
+add_kernel(share_data_compute_host Host extra SRCS share_data_compute.cc)
+add_kernel(round_compute_host Host extra SRCS round_compute.cc)
 
 if(LITE_BUILD_EXTRA AND LITE_WITH_x86)
   lite_cc_test(test_where_index_compute_host SRCS where_index_compute.cc)

@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include <cmath>
 #include <cstring>
 #include <functional>
 #include <utility>
@@ -410,6 +411,7 @@ void BatchElementWiseArg<Elem_t, DimValue_t>::Update(
       }
       break;
     }
+
     default: {
       return;  // code should never goes to here
     }
@@ -525,6 +527,7 @@ void common_elmentwise_op_naive_cpu(
       }
       break;
     }
+    default: { return; }
   }
 }
 

@@ -97,6 +97,27 @@ void ExpandV2Compute<T, PType>::Run() {
 }  // namespace lite
 }  // namespace paddle
 
+using expand_v2_bool =
+    paddle::lite::kernels::host::ExpandV2Compute<bool, PRECISION(kFloat)>;
+REGISTER_LITE_KERNEL(expand_v2, kHost, kFloat, kAny, expand_v2_bool, def_bool)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kBool),
+                                      DATALAYOUT(kAny))})
+    .BindInput("Shape",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kInt32),
+                                      DATALAYOUT(kAny))})
+    .BindInput("expand_shapes_tensor",
+               {LiteType::GetTensorTy(TARGET(kHost),
+                                      PRECISION(kInt32),
+                                      DATALAYOUT(kAny))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost),
+                                       PRECISION(kBool),
+                                       DATALAYOUT(kAny))})
+    .Finalize();
+
 using expand_v2_float =
     paddle::lite::kernels::host::ExpandV2Compute<float, PRECISION(kFloat)>;
 REGISTER_LITE_KERNEL(expand_v2, kHost, kFloat, kAny, expand_v2_float, def)

@@ -0,0 +1,54 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/kernels/host/round_compute.h"
+#include "lite/kernels/host/elementwise_op_func.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T>
+void RoundCompute<T>::Run() {
+  auto& param = Param<operators::RoundParam>();
+  const lite::Tensor* input = param.X;
+  lite::Tensor* output = param.Out;
+
+  output->Resize(input->dims());
+
+  auto out_num = output->dims().production();
+  for (int i = 0; i < out_num; ++i) {
+    output->mutable_data<T>()[i] = std::round(input->data<T>()[i]);
+  }
+
+  return;
+}
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_KERNEL(round,
+                     kHost,
+                     kAny,
+                     kNCHW,
+                     paddle::lite::kernels::host::RoundCompute<float>,
+                     fp32)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kFloat))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
+    .BindPaddleOpVersion("round", 1)
+    .Finalize();
@@ -0,0 +1,35 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "lite/core/kernel.h"
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace kernels {
+namespace host {
+
+template <typename T>
+class RoundCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
+ public:
+  using param_t = operators::RoundParam;
+  void Run() override;
+  virtual ~RoundCompute() = default;
+};
+
+}  // namespace host
+}  // namespace kernels
+}  // namespace lite
+}  // namespace paddle