From 7e7e9404217fb1f2473f9e41d01dece67c45681f Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 15 Feb 2022 09:39:01 +0800 Subject: [PATCH] [PTen]Migrate proto::VarType outside of Pten (#39411) * #1 migrate dist-related type()-> dtype() * move datatype function from pten -> fluid/framework * change type() in imperative into convert(dtype()) * modify xx_tensor->type into xx_tensor->dtype * change the set_type interface and the caller * modify xx_tensor.type into xx_tensor.dtype * fix mutable_data(place, dtype()) * change caller of mutable_data in pten and distributed * change the caller of mutable_data in fluid/framework * change the caller of mutable_data in imperative directory * mutable_data: inference * update the call of mutable_data * transfer MakePenScalarArray MakePtenScalar ResetHolderWithType * pass the compile. the next step is remove VarType in Pten * fix all and remove VarType from pten. success in linux. Next task is other platform * fix conflict with develop * fix compiled error * Fix reset conversion * fix conflict * fix compiled problem * fix typo * Fix << in tensor_utils.cc * fix type->dtype * fix unittest * fix tensor init constructor * fix DataTypeSize for BFloat16 * fix code style * fix npu compiled error * fix npu * compile npu sucessfully * fix conflict * fix conflict Co-authored-by: xiongkun --- .../distributed/fleet_executor/dist_model.cc | 2 +- .../distributed/ps/service/brpc_utils.cc | 67 ++++--- .../distributed/ps/service/heter_client.cc | 6 +- paddle/fluid/eager/grad_tensor_holder.cc | 1 + paddle/fluid/framework/CMakeLists.txt | 6 + paddle/fluid/framework/convert_utils.cc | 185 ++++++++++++++++++ paddle/fluid/framework/convert_utils.h | 46 +++++ paddle/fluid/framework/custom_operator.cc | 15 +- .../fluid/framework/data_layout_transform.cc | 21 +- paddle/fluid/framework/data_type_test.cc | 11 +- paddle/fluid/framework/data_type_transform.cc | 17 +- .../framework/details/all_reduce_op_handle.cc | 10 +- .../framework/details/broadcast_op_handle.cc | 9 +- .../details/fetch_async_op_handle.cc | 11 +- .../details/fused_all_reduce_op_handle.cc | 4 +- .../framework/details/gather_op_handle.cc | 2 +- .../framework/details/nan_inf_utils_detail.cc | 24 ++- .../framework/details/nan_inf_utils_detail.cu | 3 +- .../framework/details/reduce_and_gather.h | 3 +- .../framework/details/reduce_op_handle.cc | 17 +- .../details/sparse_all_reduce_op_handle.cc | 5 +- .../framework/details/variable_visitor.cc | 3 +- paddle/fluid/framework/device_worker.cc | 10 +- paddle/fluid/framework/dist_multi_trainer.cc | 10 +- paddle/fluid/framework/dlpack_tensor.cc | 4 +- .../fluid/framework/executor_thread_worker.cc | 15 +- paddle/fluid/framework/fleet/ascend_wrapper.h | 12 +- paddle/fluid/framework/fleet/heter_wrapper.cc | 45 +++-- .../fluid/framework/heter_section_worker.cc | 18 +- paddle/fluid/framework/heterxpu_trainer.cc | 61 +++--- paddle/fluid/framework/hogwild_worker.cc | 12 +- paddle/fluid/framework/infershape_utils.cc | 5 +- .../ir/conv_affine_channel_fuse_pass.cc | 4 +- .../fluid/framework/ir/conv_bn_fuse_pass.cc | 7 +- .../framework/ir/layer_norm_fuse_pass.cc | 12 +- .../conv_bias_mkldnn_fuse_pass_tester.cc | 3 +- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 3 +- .../mkldnn/cpu_quantize_squash_pass_tester.cc | 3 +- paddle/fluid/framework/lod_tensor.cc | 13 +- paddle/fluid/framework/multi_trainer.cc | 10 +- .../framework/new_executor/data_transfer.cc | 5 +- paddle/fluid/framework/operator.cc | 22 ++- paddle/fluid/framework/operator.h | 5 +- .../paddle2cinn/cinn_graph_symbolization.cc | 3 +- .../cinn_graph_symbolization_test.cc | 5 +- paddle/fluid/framework/parallel_executor.cc | 16 +- paddle/fluid/framework/ps_gpu_trainer.cc | 10 +- paddle/fluid/framework/pten_utils.cc | 5 +- paddle/fluid/framework/save_load_util.cc | 5 +- paddle/fluid/framework/tensor_util.cc | 46 +++-- paddle/fluid/framework/tensor_util.h | 6 +- paddle/fluid/imperative/all_reduce.cc | 10 +- paddle/fluid/imperative/basic_engine.cc | 4 +- paddle/fluid/imperative/bkcl_context.cc | 11 +- paddle/fluid/imperative/gloo_context.cc | 7 +- .../fluid/imperative/gradient_accumulator.cc | 29 ++- paddle/fluid/imperative/hccl_context.cc | 9 +- .../imperative/jit/program_desc_tracer.cc | 3 +- paddle/fluid/imperative/layer.cc | 10 +- paddle/fluid/imperative/nccl_context.cc | 4 +- .../fluid/imperative/partial_grad_engine.cc | 10 +- paddle/fluid/imperative/prepared_operator.h | 3 +- paddle/fluid/imperative/reducer.cc | 5 +- .../tests/test_gradient_accmulator.cc | 7 +- paddle/fluid/imperative/tests/test_group.cc | 4 +- paddle/fluid/imperative/var_helper.cc | 7 +- paddle/fluid/imperative/variable_wrapper.h | 3 +- .../fluid/inference/api/analysis_predictor.cc | 2 +- paddle/fluid/inference/api/api_impl.cc | 2 +- paddle/fluid/inference/api/api_impl_tester.cc | 10 +- .../inference/api/details/zero_copy_tensor.cc | 3 +- paddle/fluid/inference/capi/pd_predictor.cc | 9 +- paddle/fluid/inference/lite/tensor_utils.cc | 38 ++-- .../fluid/inference/tests/api/tester_helper.h | 4 +- paddle/fluid/operators/abs_op.cc | 5 +- paddle/fluid/operators/activation_op_mlu.cc | 25 ++- paddle/fluid/operators/activation_op_npu.cc | 9 +- paddle/fluid/operators/allclose_op.h | 22 ++- .../amp/update_loss_scaling_op_npu.cc | 6 +- paddle/fluid/operators/argsort_op_npu.cc | 13 +- .../fluid/operators/array_to_lod_tensor_op.cc | 9 +- paddle/fluid/operators/batch_norm_op.cc | 12 +- .../fluid/operators/beam_search_decode_op.cc | 3 +- paddle/fluid/operators/bincount_op.cu | 4 +- paddle/fluid/operators/bincount_op.h | 4 +- paddle/fluid/operators/cast_op.cc | 25 ++- paddle/fluid/operators/cast_op.h | 2 +- paddle/fluid/operators/cast_op_npu.cc | 2 +- paddle/fluid/operators/cast_op_xpu.cc | 3 +- .../fluid/operators/class_center_sample_op.cu | 5 +- paddle/fluid/operators/coalesce_tensor_op.cc | 6 +- .../fluid/operators/collective/allreduce_op.h | 3 +- .../operators/collective/alltoall_op.cu.cc | 3 +- .../operators/collective/barrier_op.cu.cc | 3 +- .../operators/collective/broadcast_op.cu.cc | 4 +- .../operators/collective/c_allgather_op.cu.cc | 4 +- .../collective/c_allgather_op_npu.cc | 3 +- .../operators/collective/c_allreduce_op.h | 11 +- .../operators/collective/c_broadcast_op.cu.cc | 3 +- .../collective/c_broadcast_op_npu.cc | 3 +- .../operators/collective/c_concat_op.cu.cc | 3 +- .../operators/collective/c_embedding_op.cu | 5 +- .../operators/collective/c_embedding_op.h | 10 +- .../collective/c_embedding_op_npu.cc | 17 +- .../fluid/operators/collective/c_reduce_op.h | 10 +- .../collective/c_reducescatter_op.cu.cc | 3 +- .../collective/c_reducescatter_op_npu.cc | 3 +- .../operators/collective/c_scatter_op.cu.cc | 3 +- .../c_softmax_with_cross_entropy_op.cu | 19 +- .../collective/global_gather_op.cu.cc | 10 +- .../collective/global_scatter_op.cu.cc | 10 +- .../collective/partial_allgather_op.cu.cc | 3 +- .../collective/partial_allgather_op_npu.cc | 3 +- .../collective/partial_recv_op_npu.cc | 3 +- .../collective/partial_send_op.cu.cc | 4 +- .../collective/partial_send_op_npu.cc | 3 +- .../operators/collective/recv_v2_op_npu.cc | 3 +- .../operators/collective/send_v2_op.cu.cc | 6 +- .../operators/collective/send_v2_op_npu.cc | 3 +- paddle/fluid/operators/concat_op.cc | 2 +- paddle/fluid/operators/concat_op_mlu.cc | 8 +- .../controlflow/conditional_block_op.cc | 2 +- .../controlflow/conditional_block_op.h | 3 +- .../controlflow/tensor_array_read_write_op.cc | 2 +- .../fluid/operators/controlflow/while_op.cc | 3 +- paddle/fluid/operators/conv_op.cc | 3 +- paddle/fluid/operators/conv_op.h | 20 +- paddle/fluid/operators/conv_op_mlu.cc | 10 +- paddle/fluid/operators/correlation_op.cc | 8 +- paddle/fluid/operators/crop_op_npu.cc | 2 +- paddle/fluid/operators/cumsum_op_npu.cc | 9 +- paddle/fluid/operators/data_norm_op.cu | 9 +- paddle/fluid/operators/decode_jpeg_op.cc | 5 +- paddle/fluid/operators/detection/bbox_util.h | 3 +- .../detection/density_prior_box_op_npu.cc | 9 +- .../detection/iou_similarity_op_npu.cc | 2 +- .../fluid/operators/detection/prior_box_op.cc | 4 +- .../fluid/operators/dlnne/dlnne_engine_op.h | 2 +- paddle/fluid/operators/dropout_op_npu.cc | 21 +- paddle/fluid/operators/eig_op.h | 2 +- paddle/fluid/operators/eigvals_op.h | 9 +- .../elementwise/elementwise_div_op.h | 5 +- .../elementwise/elementwise_div_op_npu.cc | 4 +- .../elementwise/elementwise_mul_op.h | 5 +- .../operators/elementwise/elementwise_op.h | 25 ++- .../mkldnn/elementwise_add_mkldnn_op.cc | 9 +- .../mkldnn/elementwise_sub_mkldnn_op.cc | 9 +- paddle/fluid/operators/empty_op.h | 4 +- paddle/fluid/operators/expand_v2_op_npu.cc | 10 +- .../fluid/operators/fill_any_like_op_npu.cc | 2 +- .../fill_constant_batch_size_like_op.h | 7 +- .../fill_constant_batch_size_like_op_npu.cc | 8 +- paddle/fluid/operators/fill_constant_op.h | 13 +- .../fluid/operators/fill_constant_op_mlu.cc | 5 +- .../fluid/operators/fill_constant_op_npu.cc | 2 +- paddle/fluid/operators/fill_diagonal_op.cc | 4 +- .../operators/fill_diagonal_tensor_op.cc | 3 +- paddle/fluid/operators/fill_op.h | 6 +- .../fluid/operators/fused/conv_fusion_op.cu | 4 +- .../operators/fused/fused_attention_op.cc | 4 +- .../operators/fused/fused_bn_activation_op.cc | 13 +- .../operators/fused/fused_bn_activation_op.cu | 4 +- .../fused/fused_bn_add_activation_op.cc | 6 +- .../fused/fused_bn_add_activation_op.cu | 6 +- .../fused/fused_elemwise_activation_op.cc | 4 +- .../fused_embedding_eltwise_layernorm_op.cc | 2 +- .../fused_embedding_eltwise_layernorm_op.cu | 7 +- .../operators/fused/fused_feedforward_op.cc | 2 +- .../fused/mkldnn/fusion_gru_mkldnn_op.cc | 6 +- .../fused/mkldnn/fusion_lstm_mkldnn_op.cc | 8 +- .../fluid/operators/fused/resnet_unit_op.cc | 6 +- paddle/fluid/operators/gather_nd_op.cu | 4 +- paddle/fluid/operators/gather_nd_op.h | 4 +- paddle/fluid/operators/gather_nd_op_npu.cc | 2 +- paddle/fluid/operators/gather_nd_op_xpu.cc | 2 +- paddle/fluid/operators/gather_op.cu | 11 +- paddle/fluid/operators/gather_op.h | 11 +- paddle/fluid/operators/gather_op_xpu.cc | 6 +- .../fluid/operators/gaussian_random_op_npu.cc | 3 +- paddle/fluid/operators/graph_send_recv_op.cu | 4 +- paddle/fluid/operators/graph_send_recv_op.h | 4 +- paddle/fluid/operators/group_norm_op.cc | 3 +- paddle/fluid/operators/increment_op_npu.cc | 2 +- paddle/fluid/operators/index_sample_op.cu | 4 +- paddle/fluid/operators/index_sample_op.h | 7 +- paddle/fluid/operators/index_sample_op_npu.cc | 4 +- paddle/fluid/operators/index_select_op.cu | 4 +- paddle/fluid/operators/index_select_op.h | 4 +- paddle/fluid/operators/index_select_op_npu.cc | 3 +- paddle/fluid/operators/inplace_abn_op.cc | 20 +- paddle/fluid/operators/instance_norm_op.cc | 6 +- .../fluid/operators/interpolate_v2_op_npu.cc | 5 +- paddle/fluid/operators/ipu/ipu_runtime_op.cc | 2 +- paddle/fluid/operators/isclose_op.h | 22 ++- paddle/fluid/operators/isfinite_op.cc | 6 +- paddle/fluid/operators/isfinite_v2_op.cc | 6 +- paddle/fluid/operators/kron_op.cc | 10 +- paddle/fluid/operators/layer_norm_op.cu | 20 +- paddle/fluid/operators/layer_norm_op_npu.cc | 117 +++++++---- paddle/fluid/operators/linspace_op.cu | 8 +- paddle/fluid/operators/linspace_op.h | 8 +- paddle/fluid/operators/load_combine_op.h | 3 +- paddle/fluid/operators/load_op.h | 3 +- .../fluid/operators/lod_tensor_to_array_op.cc | 3 +- paddle/fluid/operators/lookup_table_op.h | 3 +- paddle/fluid/operators/lookup_table_v2_op.cu | 6 +- paddle/fluid/operators/lookup_table_v2_op.h | 9 +- .../fluid/operators/lookup_table_v2_op_npu.cc | 3 +- paddle/fluid/operators/lstsq_op.h | 2 +- paddle/fluid/operators/lu_op.h | 7 +- .../operators/margin_cross_entropy_op.cu | 24 ++- .../fluid/operators/math/beam_search_npu.cc | 112 ++++++----- paddle/fluid/operators/math/cross_entropy.cc | 4 +- paddle/fluid/operators/math/cross_entropy.cu | 4 +- .../operators/math/eigen_values_vectors.h | 6 +- .../fluid/operators/math/matrix_solve.cu.cc | 2 +- paddle/fluid/operators/matmul_op.cc | 5 +- paddle/fluid/operators/matmul_op_npu.cc | 22 +-- paddle/fluid/operators/matmul_v2_op.cc | 10 +- paddle/fluid/operators/matrix_power_op.h | 2 +- paddle/fluid/operators/mean_op_mlu.cc | 27 +-- paddle/fluid/operators/mean_op_npu.cc | 6 +- paddle/fluid/operators/memcpy_h2d_op.h | 2 +- paddle/fluid/operators/meshgrid_op.cc | 2 +- paddle/fluid/operators/meshgrid_op_npu.cc | 2 +- .../operators/metrics/accuracy_op_npu.cc | 28 +-- .../operators/mkldnn/concat_mkldnn_op.cc | 12 +- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 24 ++- .../operators/mkldnn/dequantize_mkldnn_op.cc | 4 +- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 10 +- .../operators/mkldnn/matmul_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 7 +- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 6 +- .../operators/mkldnn/requantize_mkldnn_op.cc | 6 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 60 +++--- .../fluid/operators/mkldnn/slice_mkldnn_op.cc | 17 +- .../fluid/operators/mkldnn/split_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/stack_mkldnn_op.cc | 3 +- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 4 +- paddle/fluid/operators/mlu/mlu_baseop.cc | 5 +- paddle/fluid/operators/mul_op_npu.cc | 12 +- paddle/fluid/operators/one_hot_op_npu.cc | 3 +- paddle/fluid/operators/one_hot_v2_op_npu.cc | 3 +- .../fluid/operators/optimizers/adam_op_npu.cc | 12 +- .../operators/optimizers/rmsprop_op_npu.cc | 6 +- paddle/fluid/operators/optimizers/sgd_op.cc | 5 +- .../operators/optimizers/sparse_momentum_op.h | 6 +- paddle/fluid/operators/partial_concat_op.cc | 2 +- paddle/fluid/operators/partial_sum_op.cc | 2 +- .../pscore/send_and_recv_op_gpu_test.cc | 14 +- paddle/fluid/operators/put_along_axis_op.cu | 4 +- paddle/fluid/operators/put_along_axis_op.h | 4 +- paddle/fluid/operators/pyramid_hash_op.cc | 3 +- paddle/fluid/operators/randint_op.cu | 3 +- .../fluid/operators/reader/buffered_reader.cc | 9 +- paddle/fluid/operators/reader/read_op.cc | 3 +- paddle/fluid/operators/recurrent_op.cc | 9 +- .../reduce_ops/mkldnn/reduce_mkldnn_op.h | 20 +- .../operators/reduce_ops/reduce_max_op_npu.cc | 9 +- .../reduce_ops/reduce_mean_op_mlu.cc | 24 ++- .../operators/reduce_ops/reduce_min_op_npu.cc | 8 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 40 ++-- .../reduce_ops/reduce_prod_op_npu.cc | 6 +- .../operators/reduce_ops/reduce_sum_op.h | 5 +- .../operators/reduce_ops/reduce_sum_op_npu.cc | 15 +- .../fluid/operators/repeat_interleave_op.cu | 6 +- paddle/fluid/operators/repeat_interleave_op.h | 6 +- .../fluid/operators/rnn_memory_helper_op.cc | 2 +- paddle/fluid/operators/roi_align_op_npu.cc | 7 +- paddle/fluid/operators/save_combine_op.h | 3 +- paddle/fluid/operators/save_op.h | 2 +- paddle/fluid/operators/scale_op_mlu.cc | 18 +- paddle/fluid/operators/scale_op_npu.cc | 6 +- paddle/fluid/operators/scatter_nd_add_op.cc | 5 +- paddle/fluid/operators/scatter_nd_add_op.cu | 4 +- paddle/fluid/operators/scatter_nd_add_op.h | 4 +- paddle/fluid/operators/scatter_op.cu | 4 +- paddle/fluid/operators/scatter_op.h | 4 +- paddle/fluid/operators/scatter_op_npu.cc | 6 +- paddle/fluid/operators/scatter_op_xpu.cc | 2 +- paddle/fluid/operators/searchsorted_op.h | 4 +- paddle/fluid/operators/segment_pool_op.h | 4 +- .../sequence_ops/sequence_mask_op_npu.cc | 7 +- paddle/fluid/operators/set_value_op.h | 17 +- paddle/fluid/operators/set_value_op_npu.cc | 3 +- paddle/fluid/operators/shard_index_op_npu.cc | 2 +- .../fluid/operators/shrink_rnn_memory_op.cc | 4 +- paddle/fluid/operators/slice_op.cc | 7 +- .../fluid/operators/smooth_l1_loss_op_npu.cc | 22 +-- .../operators/softmax_with_cross_entropy_op.h | 3 +- paddle/fluid/operators/solve_op.h | 22 +-- paddle/fluid/operators/sparse_attention_op.cu | 6 +- paddle/fluid/operators/spectral_op.cc | 11 +- paddle/fluid/operators/spectral_op.cu | 22 ++- paddle/fluid/operators/spectral_op.h | 6 +- paddle/fluid/operators/split_op_mlu.cc | 8 +- paddle/fluid/operators/strided_slice_op.cc | 4 +- paddle/fluid/operators/sum_op.cc | 16 +- paddle/fluid/operators/sum_op_mlu.cc | 8 +- .../fluid/operators/sync_batch_norm_op.cu.h | 7 +- .../fluid/operators/sync_batch_norm_op_npu.cc | 21 +- paddle/fluid/operators/take_along_axis_op.cu | 4 +- paddle/fluid/operators/take_along_axis_op.h | 4 +- paddle/fluid/operators/tdm_child_op.h | 6 +- paddle/fluid/operators/tdm_sampler_op.h | 9 +- paddle/fluid/operators/tensor_formatter.cc | 4 +- .../operators/tensorrt/tensorrt_engine_op.h | 2 +- paddle/fluid/operators/top_k_op_npu.cc | 5 +- paddle/fluid/operators/top_k_v2_op_npu.cc | 5 +- paddle/fluid/operators/transfer_layout_op.h | 3 +- paddle/fluid/operators/transpose_op.cc | 3 +- paddle/fluid/operators/tril_triu_op_npu.cc | 3 +- .../truncated_gaussian_random_op_npu.cc | 13 +- paddle/fluid/operators/uniform_random_op.h | 17 +- .../fluid/operators/uniform_random_op_npu.cc | 2 +- paddle/fluid/operators/unique_op.h | 2 +- paddle/fluid/operators/unsqueeze_op.cc | 6 +- paddle/fluid/operators/utils.h | 16 +- paddle/fluid/operators/where_index_op_npu.cc | 3 +- paddle/fluid/operators/where_op_npu.cc | 2 +- .../platform/device/gpu/cuda/cudnn_desc.h | 12 +- .../platform/device/gpu/rocm/miopen_desc.h | 9 +- .../fluid/platform/device/ipu/ipu_compiler.cc | 7 +- .../fluid/platform/device/ipu/ipu_executor.cc | 3 +- paddle/fluid/platform/device/ipu/ipu_utils.h | 3 +- .../platform/device/npu/npu_op_runner.cc | 21 +- .../fluid/platform/device/npu/npu_op_runner.h | 3 +- paddle/fluid/platform/lodtensor_printer.cc | 47 ++--- paddle/fluid/platform/mkldnn_reuse.h | 6 +- paddle/fluid/pybind/eager.cc | 4 +- paddle/fluid/pybind/eager_functions.cc | 3 +- paddle/fluid/pybind/eager_method.cc | 6 +- paddle/fluid/pybind/eager_properties.cc | 3 +- paddle/fluid/pybind/imperative.cc | 34 ++-- paddle/fluid/pybind/pybind.cc | 30 ++- paddle/fluid/pybind/reader_py.cc | 3 +- paddle/fluid/pybind/tensor_py.h | 25 +-- paddle/pten/api/lib/utils/tensor_utils.cc | 8 +- paddle/pten/core/CMakeLists.txt | 4 +- paddle/pten/core/compat/convert_utils.cc | 162 --------------- paddle/pten/core/compat/convert_utils.h | 10 - paddle/pten/core/dense_tensor.inl | 16 +- paddle/pten/core/dense_tensor_impl.cc | 28 ++- paddle/pten/kernels/cpu/copy_kernel.cc | 3 +- paddle/pten/kernels/funcs/math_function.cc | 10 +- paddle/pten/kernels/funcs/math_function.cu | 3 +- paddle/pten/kernels/funcs/math_function.h | 1 + .../pten/kernels/funcs/math_function_impl.h | 2 +- paddle/pten/kernels/gpu/copy_kernel.cu | 3 +- paddle/pten/kernels/xpu/copy_kernel.cc | 3 +- paddle/pten/tests/core/test_convert_utils.cc | 61 +++--- .../tests/custom_op/custom_raw_op_kernel_op.h | 4 +- 352 files changed, 2175 insertions(+), 1445 deletions(-) create mode 100644 paddle/fluid/framework/convert_utils.cc create mode 100644 paddle/fluid/framework/convert_utils.h mode change 100755 => 100644 paddle/fluid/operators/gaussian_random_op_npu.cc mode change 100755 => 100644 paddle/fluid/operators/top_k_v2_op_npu.cc diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc index e0b755c66bb17..40b0a8b55e17a 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc @@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector *output_data, framework::FetchType &fetch_var = framework::GetFetchVariable(*scope, "fetch", idx); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); - auto type = fetch.type(); + auto type = framework::TransToProtoVarType(fetch.dtype()); auto output = &(output_data->at(i)); output->name = idx_to_fetches_[idx]; bool rst = false; diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index 23b2f5545ffc2..d1ef8b89ff092 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -13,8 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/service/brpc_utils.h" + #include #include + +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var, } } } - var_msg->set_data_type(static_cast(tensor->type())); + var_msg->set_data_type(static_cast( + framework::TransToProtoVarType(tensor->dtype()))); for (auto& dim : framework::vectorize(tensor->dims())) { var_msg->add_dims(dim); } // IO Buffer if (platform::is_cpu_place(tensor->place())) { - auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); + auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); iobuf->append(reinterpret_cast(&data_len), 8); iobuf->append(reinterpret_cast(tensor->data()), data_len); } else { #ifdef PADDLE_WITH_CUDA - char* temp_ptr = new char[tensor->numel() * - framework::SizeOfType(tensor->type())]; // NOLINT + char* temp_ptr = + new char[tensor->numel() * + framework::DataTypeSize(tensor->dtype())]; // NOLINT auto stream = reinterpret_cast(ctx).stream(); memory::Copy( platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), - tensor->numel() * framework::SizeOfType(tensor->type()), stream); - auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); + tensor->numel() * framework::SizeOfType( + framework::TransToProtoVarType(tensor->dtype())), + stream); + auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); iobuf->append(reinterpret_cast(&data_len), 8); iobuf->append(reinterpret_cast(temp_ptr), data_len); delete[] temp_ptr; @@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var, var_data->resize(rows->size() * sizeof(int64_t)); char* data_ptr = const_cast(var_data->data()); memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t)); - var_msg->set_data_type(static_cast(tensor->type())); + var_msg->set_data_type(static_cast( + framework::TransToProtoVarType(tensor->dtype()))); for (auto& dim : framework::vectorize(tensor->dims())) { var_msg->add_dims(dim); } // IO Buffer if (platform::is_cpu_place(tensor->place())) { - auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); + auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); iobuf->append(reinterpret_cast(&data_len), 8); iobuf->append(reinterpret_cast(tensor->data()), data_len); } else { #ifdef PADDLE_WITH_CUDA - char* temp_ptr = new char[tensor->numel() * - framework::SizeOfType(tensor->type())]; // NOLINT + char* temp_ptr = + new char[tensor->numel() * + framework::DataTypeSize(tensor->dtype())]; // NOLINT auto stream = reinterpret_cast(ctx).stream(); memory::Copy( platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), - tensor->numel() * framework::SizeOfType(tensor->type()), stream); - auto data_len = tensor->numel() * framework::SizeOfType(tensor->type()); + tensor->numel() * framework::SizeOfType( + framework::TransToProtoVarType(tensor->dtype())), + stream); + auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype()); iobuf->append(reinterpret_cast(&data_len), 8); iobuf->append(reinterpret_cast(temp_ptr), data_len); delete[] temp_ptr; @@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, } tensor->set_lod(lod); - void* tensor_data = - tensor->mutable_data(place, VarMessageToVarType(msg.data_type())); + void* tensor_data = tensor->mutable_data( + place, + framework::TransToPtenDataType(VarMessageToVarType(msg.data_type()))); // IO Buffer if (platform::is_cpu_place(place)) { @@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg, } else if (platform::is_gpu_place(place)) { #ifdef PADDLE_WITH_CUDA unsigned long data_len; // NOLINT - char* temp_ptr = new char[tensor->numel() * - framework::SizeOfType(tensor->type())]; // NOLINT - io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT - io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT + char* temp_ptr = + new char[tensor->numel() * + framework::DataTypeSize(tensor->dtype())]; // NOLINT + io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT + io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len); // NOLINT auto stream = reinterpret_cast(ctx).stream(); memory::Copy( place, tensor_data, platform::CPUPlace(), (void*)temp_ptr, // NOLINT - tensor->numel() * framework::SizeOfType(tensor->type()), stream); + tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream); delete[] temp_ptr; #endif } @@ -266,8 +279,9 @@ void DeserializeSelectedRows( vec_dim.push_back(x); } tensor->Resize(framework::make_ddim(vec_dim)); - void* tensor_data = - tensor->mutable_data(place, VarMessageToVarType(msg.data_type())); + void* tensor_data = tensor->mutable_data( + place, + framework::TransToPtenDataType(VarMessageToVarType(msg.data_type()))); // IO Buffer if (platform::is_cpu_place(place)) { unsigned long data_len; // NOLINT @@ -275,15 +289,16 @@ void DeserializeSelectedRows( io_buffer_itr.copy_and_forward(tensor_data, data_len); } else if (platform::is_gpu_place(place)) { #ifdef PADDLE_WITH_CUDA - char* temp_ptr = new char[tensor->numel() * - framework::SizeOfType(tensor->type())]; // NOLINT - unsigned long data_len; // NOLINT - io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT + char* temp_ptr = + new char[tensor->numel() * + framework::DataTypeSize(tensor->dtype())]; // NOLINT + unsigned long data_len; // NOLINT + io_buffer_itr.copy_and_forward((void*)(&data_len), 8); // NOLINT io_buffer_itr.copy_and_forward(temp_ptr, data_len); auto stream = reinterpret_cast(ctx).stream(); memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr, - tensor->numel() * framework::SizeOfType(tensor->type()), + tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream); delete[] temp_ptr; #endif diff --git a/paddle/fluid/distributed/ps/service/heter_client.cc b/paddle/fluid/distributed/ps/service/heter_client.cc index e9e3ec1d9df47..8aebae237360e 100644 --- a/paddle/fluid/distributed/ps/service/heter_client.cc +++ b/paddle/fluid/distributed/ps/service/heter_client.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/heter_client.h" + +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/split.h" @@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx, } else { #ifdef PADDLE_WITH_CUDA std::vector temp; - temp.resize(tensor->numel() * framework::SizeOfType(tensor->type())); + temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype())); char* temp_ptr = temp.data(); auto stream = reinterpret_cast(ctx).stream(); memory::Copy( platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(), - tensor->numel() * framework::SizeOfType(tensor->type()), stream); + tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream); float* temp_ptr_float = reinterpret_cast(temp_ptr); micro_id = static_cast(temp_ptr_float[0]); #endif diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 8bfeaf47b23c3..56fdf542bbb93 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/fluid/imperative/gradient_accumulator.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/pten/kernels/funcs/math_function.h" diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 95c814380e3d3..a3f0ed392646c 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -452,4 +452,10 @@ endif() cc_test(scope_guard_test SRCS scope_guard_test.cc) cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils) + +if(WITH_GPU OR WITH_ROCM) + cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) +else() + cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place) +endif() cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor) diff --git a/paddle/fluid/framework/convert_utils.cc b/paddle/fluid/framework/convert_utils.cc new file mode 100644 index 0000000000000..78dd030698de3 --- /dev/null +++ b/paddle/fluid/framework/convert_utils.cc @@ -0,0 +1,185 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/device/gpu/gpu_info.h" + +namespace paddle { +namespace framework { + +paddle::experimental::DataType TransToPtenDataType( + const paddle::framework::proto::VarType::Type& dtype) { + // Set the order of case branches according to the frequency with + // the data type is used + switch (dtype) { + case paddle::framework::proto::VarType::FP32: + return DataType::FLOAT32; + case paddle::framework::proto::VarType::FP64: + return DataType::FLOAT64; + case paddle::framework::proto::VarType::INT64: + return DataType::INT64; + case paddle::framework::proto::VarType::INT32: + return DataType::INT32; + case paddle::framework::proto::VarType::INT8: + return DataType::INT8; + case paddle::framework::proto::VarType::UINT8: + return DataType::UINT8; + case paddle::framework::proto::VarType::INT16: + return DataType::INT16; + case paddle::framework::proto::VarType::COMPLEX64: + return DataType::COMPLEX64; + case paddle::framework::proto::VarType::COMPLEX128: + return DataType::COMPLEX128; + case paddle::framework::proto::VarType::FP16: + return DataType::FLOAT16; + case paddle::framework::proto::VarType::BF16: + return DataType::BFLOAT16; + case paddle::framework::proto::VarType::BOOL: + return DataType::BOOL; + default: + return DataType::UNDEFINED; + } +} + +paddle::framework::proto::VarType::Type TransToProtoVarType( + const paddle::experimental::DataType& dtype) { + // Set the order of case branches according to the frequency with + // the data type is used + switch (dtype) { + case DataType::FLOAT32: + return paddle::framework::proto::VarType::FP32; + case DataType::FLOAT64: + return paddle::framework::proto::VarType::FP64; + case DataType::INT64: + return paddle::framework::proto::VarType::INT64; + case DataType::INT32: + return paddle::framework::proto::VarType::INT32; + case DataType::INT8: + return paddle::framework::proto::VarType::INT8; + case DataType::UINT8: + return paddle::framework::proto::VarType::UINT8; + case DataType::INT16: + return paddle::framework::proto::VarType::INT16; + case DataType::COMPLEX64: + return paddle::framework::proto::VarType::COMPLEX64; + case DataType::COMPLEX128: + return paddle::framework::proto::VarType::COMPLEX128; + case DataType::FLOAT16: + return paddle::framework::proto::VarType::FP16; + case DataType::BFLOAT16: + return paddle::framework::proto::VarType::BF16; + case DataType::BOOL: + return paddle::framework::proto::VarType::BOOL; + default: + PADDLE_THROW(paddle::platform::errors::Unimplemented( + "Unsupported data type `%s` when casting it into " + "paddle data type.", + dtype)); + } +} + +size_t DataTypeSize(DataType dtype) { + switch (dtype) { + case DataType::UNDEFINED: + return 0; + case DataType::BOOL: + return sizeof(bool); + case DataType::INT8: + return sizeof(int8_t); + case DataType::UINT8: + return sizeof(uint8_t); + case DataType::INT16: + return sizeof(int16_t); + case DataType::INT32: + return sizeof(int); + case DataType::INT64: + return sizeof(int64_t); + case DataType::BFLOAT16: + return sizeof(paddle::platform::bfloat16); + case DataType::FLOAT16: + return sizeof(paddle::platform::float16); + case DataType::FLOAT32: + return sizeof(float); + case DataType::FLOAT64: + return sizeof(double); + case DataType::COMPLEX64: + return sizeof(paddle::platform::complex); + case DataType::COMPLEX128: + return sizeof(paddle::platform::complex); + default: + return 0; + } +} + +DataType String2DataType(const std::string& str) { + if (str == "bool") { + return DataType::BOOL; + } else if (str == "float16") { + return DataType::FLOAT16; + } else if (str == "float32") { + return DataType::FLOAT32; + } else if (str == "float64") { + return DataType::FLOAT64; + } else if (str == "int8") { + return DataType::INT8; + } else if (str == "int16") { + return DataType::INT16; + } else if (str == "int32") { + return DataType::INT32; + } else if (str == "int64") { + return DataType::INT64; + } else if (str == "uint8") { + return DataType::UINT8; + } else if (str == "complex64") { + return DataType::COMPLEX64; + } else if (str == "complex128") { + return DataType::COMPLEX128; + } else { + return DataType::UNDEFINED; + } +} + +std::string DataType2String(DataType dtype) { + switch (dtype) { + case DataType::BOOL: + return "bool"; + case DataType::INT8: + return "int8"; + case DataType::UINT8: + return "uint8"; + case DataType::INT16: + return "int16"; + case DataType::INT32: + return "int32"; + case DataType::INT64: + return "int64"; + case DataType::FLOAT16: + return "float16"; + case DataType::FLOAT32: + return "float32"; + case DataType::FLOAT64: + return "float64"; + case DataType::COMPLEX64: + return "complex64"; + case DataType::COMPLEX128: + return "complex128"; + default: + PADDLE_THROW(paddle::platform::errors::InvalidArgument( + "Unknow pten::DataType, the int value = %d.", + static_cast(dtype))); + return ""; + } +} +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/convert_utils.h b/paddle/fluid/framework/convert_utils.h new file mode 100644 index 0000000000000..e870e57b72130 --- /dev/null +++ b/paddle/fluid/framework/convert_utils.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/common/backend.h" +#include "paddle/pten/common/data_type.h" +#include "paddle/pten/common/layout.h" +#include "paddle/pten/core/tensor_meta.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/place.h" + +// TODO(chenweihang): this file may need to be removed + +namespace paddle { +namespace framework { + +using DataType = paddle::experimental::DataType; +using DataLayout = paddle::experimental::DataLayout; + +DataType TransToPtenDataType( + const paddle::framework::proto::VarType::Type& dtype); + +paddle::framework::proto::VarType::Type TransToProtoVarType( + const DataType& dtype); + +size_t DataTypeSize(DataType dtype); +DataType String2DataType(const std::string& str); +std::string DataType2String(DataType dtype); + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index d3a4909071e81..63c99ec050f0a 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -26,6 +26,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/attribute.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_meta_info_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, std::vector vec_custom_dtype; for (size_t i = 0; i < ctx->InputSize(in_name); ++i) { auto dtype = ctx->GetInputDataType(in_name, i); - vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype)); + vec_custom_dtype.emplace_back( + paddle::framework::TransToPtenDataType(dtype)); } vec_input_dtypes.emplace_back(vec_custom_dtype); } else { auto dtype = ctx->GetInputDataType(in_name); - input_dtypes.emplace_back(pten::TransToPtenDataType(dtype)); + input_dtypes.emplace_back( + paddle::framework::TransToPtenDataType(dtype)); } } @@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, auto out_name = op_outputs[i]; if (detail::IsDuplicableVar(out_name)) { for (size_t j = 0; j < output_dtypes.size(); ++j) { - auto dtype = pten::TransToProtoVarType(output_dtypes[i]); + auto dtype = + paddle::framework::TransToProtoVarType(output_dtypes[i]); ctx->SetOutputDataType(out_name, dtype, j); } } else { - ctx->SetOutputDataType(out_name, - pten::TransToProtoVarType(output_dtypes[i])); + ctx->SetOutputDataType( + out_name, + paddle::framework::TransToProtoVarType(output_dtypes[i])); } } }; diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index a014d34bcf5f0..1bf6f12e63cbb 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -18,6 +18,7 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_reuse.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace framework { @@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var, } out->Resize(make_ddim(dst_dim)); - out->mutable_data(expected_kernel_type.place_, in.type()); + out->mutable_data(expected_kernel_type.place_, in.dtype()); framework::VisitDataType( - in.type(), + framework::TransToProtoVarType(in.dtype()), CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out)); out->set_layout(expected_kernel_type.data_layout_); @@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, auto in_tz = paddle::framework::vectorize(in.dims()); auto out_tz = in_tz; - memory::data_type in_type = ToMKLDNNDataType(in.type()); - PADDLE_ENFORCE_NE(in_type, memory::data_type::undef, - platform::errors::InvalidArgument( - "Input tensor type (%s) is not supported.", - DataTypeToString(in.type()))); + memory::data_type in_type = + ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype())); + PADDLE_ENFORCE_NE( + in_type, memory::data_type::undef, + platform::errors::InvalidArgument( + "Input tensor type (%s) is not supported.", + DataTypeToString(framework::TransToProtoVarType(in.dtype())))); auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); auto out_format = @@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, if ((in_format != out_format) || always_copy) { void* in_data = GetDataFromTensor(in, in_type); - platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type, - cpu_engine); + platform::ReorderMKLDNNHandler handler( + in_tz, framework::TransToProtoVarType(in.dtype()), in_type, cpu_engine); auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data); auto reorder_dst_memory_p = diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 8e762b913f2cf..7152004b63de6 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -16,6 +16,7 @@ #include #include "gtest/gtest.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor.h" TEST(DataType, float16) { @@ -27,10 +28,11 @@ TEST(DataType, float16) { Tensor tensor; CPUPlace cpu; - tensor.mutable_data(cpu, dtype); + tensor.mutable_data(cpu, f::TransToPtenDataType(dtype)); // test fp16 tensor - EXPECT_EQ(tensor.type(), f::ToDataType(typeid(float16))); + EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()), + f::ToDataType(typeid(float16))); // test fp16 size EXPECT_EQ(f::SizeOfType(dtype), 2u); @@ -49,10 +51,11 @@ TEST(DataType, bfloat16) { Tensor tensor; CPUPlace cpu; - tensor.mutable_data(cpu, dtype); + tensor.mutable_data(cpu, f::TransToPtenDataType(dtype)); // test bf16 tensor - EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16))); + EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()), + f::ToDataType(typeid(bfloat16))); // test bf16 size EXPECT_EQ(f::SizeOfType(dtype), 2u); diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 5b6aedd2fe14b..14b5662b24aeb 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/platform/transform.h" @@ -65,12 +66,14 @@ struct CastDataType { void TransDataType(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, const Tensor& in, Tensor* out) { - PADDLE_ENFORCE_EQ(in.type(), kernel_type_for_var.data_type_, - platform::errors::InvalidArgument( - "The src dtype(%s) of input tensor and kernel_type(%s) " - "are not conststent.", - DataTypeToString(in.type()), - DataTypeToString(kernel_type_for_var.data_type_))); + PADDLE_ENFORCE_EQ( + framework::TransToProtoVarType(in.dtype()), + kernel_type_for_var.data_type_, + platform::errors::InvalidArgument( + "The src dtype(%s) of input tensor and kernel_type(%s) " + "are not conststent.", + DataTypeToString(framework::TransToProtoVarType(in.dtype())), + DataTypeToString(kernel_type_for_var.data_type_))); auto dst_type = expected_kernel_type.data_type_; TransDataType(in, dst_type, out); } @@ -81,7 +84,7 @@ void TransDataType(const Tensor& in, platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); out->Resize(in.dims()); - auto src_type = in.type(); + auto src_type = framework::TransToProtoVarType(in.dtype()); auto dst_type = type; auto ctx = pool.Get(in.place()); diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 633963d1793d3..1facbe850ee52 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/all_reduce_op_handle.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/platform/place.h" @@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl( platform::errors::PreconditionNotMet( "The numel of tensor %s should be > 0, but got numel is %d.", in_var_handles[i]->name(), numel)); - dtype = lod_tensor.type(); + dtype = framework::TransToProtoVarType(lod_tensor.dtype()); is_gpu_place = platform::is_gpu_place(lod_tensor.place()); #if defined(PADDLE_WITH_XPU_BKCL) is_xpu_place = platform::is_xpu_place(lod_tensor.place()); @@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl( "The size of tensors of the same variable in different local " "scopes should be equal.")); PADDLE_ENFORCE_EQ( - dtype, lod_tensor.type(), + dtype, framework::TransToProtoVarType(lod_tensor.dtype()), platform::errors::PreconditionNotMet( "The dtype of tensors of the same variable in different local " "scopes should be equal.")); @@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc( // Reduce All Tensor to trg in CPU ReduceBufferData func(lod_tensor_data, trg.data(), numel); - VisitDataType(trg.type(), func); + VisitDataType(framework::TransToProtoVarType(trg.dtype()), func); for (size_t i = 1; i < local_exec_scopes_.size(); ++i) { auto &scope = local_exec_scopes_[i]; auto &p = places[i]; auto *var = scope->FindVar(out_var_names[i]); - size_t size = numel * SizeOfType(trg.type()); + size_t size = + numel * SizeOfType(framework::TransToProtoVarType(trg.dtype())); RunAndRecordEvent(p, [&trg, var, p, size] { auto dst_ptr = var->GetMutable()->data(); platform::CPUPlace cpu_place; diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index e8fa500e094b3..d058949ec6a19 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/details/broadcast_op_handle.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/place.h" @@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar( int root_id = in_tensor.place().device; std::vector> broadcast_calls; - int type = platform::ToNCCLDataType(in_tensor.type()); + int type = platform::ToNCCLDataType( + framework::TransToProtoVarType(in_tensor.dtype())); size_t numel = static_cast(in_tensor.numel()); for (auto out_var_handle : out_var_handles) { @@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar( int root_id = in_tensor.place().device; std::vector> broadcast_calls; - int type = platform::ToBKCLDataType(in_tensor.type()); + int type = platform::ToBKCLDataType( + framework::TransToProtoVarType(in_tensor.dtype())); size_t numel = static_cast(in_tensor.numel()); for (auto out_var_handle : out_var_handles) { @@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue( } VariableVisitor::ShareDimsAndLoD(*in_var, out_var); VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p, - in_tensor.type()); + in_tensor.dtype()); } } diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc index 69741bd3c9719..69c39acc5fe59 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -16,6 +16,7 @@ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor, if (tensor->numel() && tensor->IsInitialized()) { // step1: check type PADDLE_ENFORCE_EQ( - type, tensor->type(), + type, framework::TransToProtoVarType(tensor->dtype()), platform::errors::InvalidArgument( "The data type of fetched Tensors or the items of fetched " "LoDTensorArray are different from each other on different " @@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor, "(th) fetched variable. Please set the " "parameter `return_merged = False` when you " "call the `Executor.run()` method.", - DataTypeToString(type), DataTypeToString(tensor->type()), offset)); + DataTypeToString(type), tensor->dtype(), offset)); // step2: check layout PADDLE_ENFORCE_EQ( @@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor( for (auto *t : src_lodtensors) { if (t->numel() && t->IsInitialized()) { check_dim = t->dims(); - new_type = t->type(); + new_type = paddle::framework::TransToProtoVarType(t->dtype()); new_layout = t->layout(); break; } @@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor( dst_lodtensor->set_lod(src_lodtensors[0]->lod()); if (platform::is_gpu_place(src_lodtensors[0]->place())) { dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(), - src_lodtensors[0]->type()); + src_lodtensors[0]->dtype()); } else { dst_lodtensor->mutable_data(platform::CPUPlace(), - src_lodtensors[0]->type()); + src_lodtensors[0]->dtype()); } // slice and memcpy diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index af1b73f40be53..8f76de2393eaa 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/device_memory_aligment.h" @@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( size_t size_of_dtype = 0; for (size_t i = 0; i < grad_tensor.size(); ++i) { // Get dtype - auto ele_dtype = grad_tensor.at(i).second->type(); + auto ele_dtype = + framework::TransToProtoVarType(grad_tensor.at(i).second->dtype()); if (i == 0) { *dtype = ele_dtype; size_of_dtype = framework::SizeOfType(ele_dtype); diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index 430f55793b736..506f4bc5af2da 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() { DDim out_dim = pre_in_value.GetCompleteDims(); out_dim[0] = static_cast(rows); out_value->mutable_value()->Resize(out_dim).mutable_data( - t_out_p, pre_in_value.value().type()); + t_out_p, pre_in_value.value().dtype()); Tensor *out_tensor = out_value->mutable_value(); // copy diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index f57136e1f0ed9..2c0d918287a52 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -19,6 +19,7 @@ #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace framework { @@ -307,7 +308,7 @@ void tensor_check(const std::string& op_type, const platform::Place& place) { TensorCheckerVisitor vistor(op_type, var_name, tensor, place); - VisitDataType(tensor.type(), vistor); + VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor); } void CheckVarHasNanOrInf(const std::string& op_type, @@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type, return; } else if (platform::is_xpu_place(tensor->place())) { #ifdef PADDLE_WITH_XPU - if (tensor->type() != proto::VarType::FP32) { + if (framework::TransToProtoVarType(tensor->dtype()) != + proto::VarType::FP32) { return; } @@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type, return; } else if (platform::is_npu_place(tensor->place())) { #ifdef PADDLE_WITH_ASCEND_CL - if (tensor->type() != proto::VarType::FP32) { + if (framework::TransToProtoVarType(tensor->dtype()) != + proto::VarType::FP32) { return; } framework::LoDTensor cpu_tensor; cpu_tensor.Resize(tensor->dims()); float* cpu_data = static_cast( - cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type())); + cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype())); framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); bool flag = false; @@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name, return; } - if ((tensor->type() != proto::VarType::FP32) && - (tensor->type() != proto::VarType::FP16)) { + if ((framework::TransToProtoVarType(tensor->dtype()) != + proto::VarType::FP32) && + (framework::TransToProtoVarType(tensor->dtype()) != + proto::VarType::FP16)) { return; } @@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name, framework::Tensor cpu_tensor; cpu_tensor.Resize(tensor->dims()); - cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type()); + cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()); framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); LOG(WARNING) << "print [" << var_name << "] tensor info:"; // use env strategy control in future, -1=print_all. int print_num = 3; - if (tensor->type() == proto::VarType::FP32) { + if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) { const float* value = cpu_tensor.data(); PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false); - } else if (tensor->type() == proto::VarType::FP16) { + } else if (framework::TransToProtoVarType(tensor->dtype()) == + proto::VarType::FP16) { const paddle::platform::float16* value = cpu_tensor.data(); PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false); diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu index bf38a56dc9372..f6a97160d8271 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu @@ -19,6 +19,7 @@ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace framework { @@ -208,7 +209,7 @@ void tensor_check(const std::string& op_type, TensorCheckerVisitor vistor(op_type, var_name, tensor, place); - VisitDataType(tensor.type(), vistor); + VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor); } } // namespace details diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index 6d136055da782..66ea7a15815ff 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor { DDim out_dim = pre_in->GetCompleteDims(); out_dim[0] = static_cast(rows); dst_tensor.mutable_value()->Resize(out_dim); - dst_tensor.mutable_value()->mutable_data(out_place, pre_in->value().type()); + dst_tensor.mutable_value()->mutable_data(out_place, + pre_in->value().dtype()); } void operator()() { diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index 5cf84a04958b8..4df42a7d93d19 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/details/reduce_op_handle.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() { if (!FLAGS_cpu_deterministic) { ReduceLoDTensor func(lod_tensors, out_var->GetMutable()); - VisitDataType(lod_tensors[0]->type(), func); + VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()), + func); } else { // We sum lod_tensors to reduce_sum_trg which is in local_scopes_0 // here, but it doesn't mean reduce_sum_trg must be in local_scopes_0. @@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() { ->FindVar(out_var_handle->name()) ->GetMutable(); ReduceLoDTensor func(lod_tensors, &reduce_sum_trg); - VisitDataType(lod_tensors[0]->type(), func); + VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()), + func); auto trg = out_var->GetMutable(); if (reduce_sum_trg.data() != trg->data()) { @@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() { auto pre_in = pre_in_var->Get(); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::GetMutableTensor(out_var).mutable_data( - out_var_handle->place(), pre_in.type()); + out_var_handle->place(), pre_in.dtype()); auto out_p = out_var_handle->place(); int root_id = out_p.device; @@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() { out_var_handle->place()); } - int type = platform::ToNCCLDataType(lod_tensor.type()); + int type = platform::ToNCCLDataType( + framework::TransToProtoVarType(lod_tensor.dtype())); size_t numel = static_cast(lod_tensor.numel()); all_reduce_calls.emplace_back( [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] { @@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() { auto pre_in = pre_in_var->Get(); VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var); VariableVisitor::GetMutableTensor(out_var).mutable_data( - out_var_handle->place(), pre_in.type()); + out_var_handle->place(), pre_in.dtype()); auto out_p = out_var_handle->place(); int root_id = out_p.device; @@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() { out_var_handle->place()); } - int type = platform::ToBKCLDataType(lod_tensor.type()); + int type = platform::ToBKCLDataType( + framework::TransToProtoVarType(lod_tensor.dtype())); size_t numel = static_cast(lod_tensor.numel()); all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id, &bkcl_ctx] { diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc index 1ab944720f8f4..f0de723c20b74 100644 --- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc @@ -17,6 +17,7 @@ #include #include "dgc/dgc.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() { auto &out = *outs[i]; float *out_tensor_buf = out.data(); - dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype; + dtype = (dtype == -1) ? platform::ToNCCLDataType( + framework::TransToProtoVarType(in.dtype())) + : dtype; in_numel = (in_numel == 0) ? static_cast(in.numel()) : in_numel; PADDLE_ENFORCE_EQ(in_numel % 2, 0, platform::errors::InvalidArgument( diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index 9979d2ee20531..cc47fe50dc8bb 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/details/variable_visitor.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/selected_rows_utils.h" namespace pten { @@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor { "The place type of the two variables is not equal. The src place " "is %s, but the dst place is %s", src.place().DebugString(), tensor.place().DebugString())); - PADDLE_ENFORCE_EQ(src.type(), tensor.type(), + PADDLE_ENFORCE_EQ(src.dtype(), tensor.dtype(), platform::errors::PreconditionNotMet( "The dtype of the two variables is not equal.")); PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index e191979c50522..8de74b2c477a1 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -14,6 +14,8 @@ limitations under the License. */ #include "paddle/fluid/framework/device_worker.h" +#include "paddle/fluid/framework/convert_utils.h" + namespace pten { class DenseTensor; } // namespace pten @@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) { std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) { std::string out_val; - if (tensor->type() == proto::VarType::FP32) { + if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) { out_val = PrintLodTensorType(tensor, start, end); - } else if (tensor->type() == proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(tensor->dtype()) == + proto::VarType::INT64) { out_val = PrintLodTensorIntType(tensor, start, end); - } else if (tensor->type() == proto::VarType::FP64) { + } else if (framework::TransToProtoVarType(tensor->dtype()) == + proto::VarType::FP64) { out_val = PrintLodTensorType(tensor, start, end); } else { out_val = "unsupported type"; diff --git a/paddle/fluid/framework/dist_multi_trainer.cc b/paddle/fluid/framework/dist_multi_trainer.cc index 4c8681aad2bb1..c0a9475f6e6d6 100644 --- a/paddle/fluid/framework/dist_multi_trainer.cc +++ b/paddle/fluid/framework/dist_multi_trainer.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/trainer.h" @@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() { } #define MergeCallback(cpp_type, proto_type) \ do { \ - if (root_tensor->type() == proto_type) { \ - if (thread_tensor->type() != proto_type) { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + if (framework::TransToProtoVarType(thread_tensor->dtype()) != \ + proto_type) { \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ << "] " << need_merge_var_names_[i] \ - << ", root tensor type=" << root_tensor->type() \ - << ", thread tensor type=" << thread_tensor->type(); \ + << ", root tensor type=" << root_tensor->dtype() \ + << ", thread tensor type=" << thread_tensor->dtype(); \ exit(-1); \ } \ MergeToRootScope(root_tensor, thread_tensor); \ diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 2ceeed694af47..24f1591ff33c9 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/dlpack_tensor.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" namespace paddle { @@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) { t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor()); // init dtype - t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type()); + t_.dtype = internal::GetDLDataTypeFromTypeIndex( + framework::TransToProtoVarType(tensor.dtype())); t_.dtype.lanes = lanes; // init ndim, tensor rank diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 668a28f6008ef..e45c630002dd6 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "google/protobuf/text_format.h" #include "gflags/gflags.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/lod_rank_table.h" @@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) { static void print_fetch_var(Scope* scope, const std::string& var_name) { auto& tensor = scope->FindVar(var_name)->Get(); -#define PrintLoDTensorCallback(cpp_type, proto_type) \ - do { \ - if (tensor.type() == proto_type) { \ - print_lod_tensor(var_name, tensor); \ - return; \ - } \ +#define PrintLoDTensorCallback(cpp_type, proto_type) \ + do { \ + if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \ + print_lod_tensor(var_name, tensor); \ + return; \ + } \ } while (0) _ForEachDataType_(PrintLoDTensorCallback); - VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type(); + VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.dtype(); } void ExecutorThreadWorker::TrainFilesWithTimer() { diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.h b/paddle/fluid/framework/fleet/ascend_wrapper.h index 4127adf1bfe27..d55862120116d 100644 --- a/paddle/fluid/framework/fleet/ascend_wrapper.h +++ b/paddle/fluid/framework/fleet/ascend_wrapper.h @@ -22,6 +22,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" @@ -146,13 +147,16 @@ class AscendInstance { // } ge::Shape shape(vec_dim); - ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND, - VarTypeToGeType(tensor->type())); + ge::TensorDesc tensor_desc( + shape, ge::Format::FORMAT_ND, + VarTypeToGeType(framework::TransToProtoVarType(tensor->dtype()))); tensor_desc.SetRealDimCnt(vec_dim.size()); const uint8_t *data = reinterpret_cast(tensor->data()); - std::vector dst(numel * GeTypeSize(tensor->type())); - memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel); + std::vector dst( + numel * GeTypeSize(framework::TransToProtoVarType(tensor->dtype()))); + memcpy(dst.data(), data, + GeTypeSize(framework::TransToProtoVarType(tensor->dtype())) * numel); ge::Tensor ge_tensor(tensor_desc, dst); return ge_tensor; } diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index 88c9a363d2789..11df5082952e5 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -28,6 +28,7 @@ limitations under the License. */ #include "paddle/fluid/framework/fleet/heter_wrapper.h" #if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS) +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/device_worker.h" namespace paddle { @@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, LoDTensor* tensor = var->GetMutable(); req_var->set_varname(varname); req_var->set_type(LOD_TENSOR); - req_var->set_data_type(static_cast(tensor->type())); + req_var->set_data_type(static_cast( + framework::TransToProtoVarType(tensor->dtype()))); for (auto& dim : framework::vectorize(tensor->dims())) { req_var->add_dims(dim); @@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, auto* req_data = req_var->mutable_data(); req_data->clear(); - req_data->resize(tensor->numel() * SizeOfType(tensor->type())); + req_data->resize(tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); char* data_ptr = const_cast(req_data->data()); if (platform::is_cpu_place(tensor->place())) { memcpy(data_ptr, tensor->data(), - tensor->numel() * SizeOfType(tensor->type())); + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); } else { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(), - tensor->data(), tensor->numel() * SizeOfType(tensor->type()), - nullptr); + memory::Copy( + platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(), + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype())), + nullptr); #endif #ifdef PADDLE_WITH_XPU - memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(), - tensor->data(), tensor->numel() * SizeOfType(tensor->type())); + memory::Copy( + platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(), + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); #endif } } @@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, } tensor->set_lod(lod); - void* tensor_data = - tensor->mutable_data(place, ToVarType(req_var.data_type())); + void* tensor_data = tensor->mutable_data( + place, framework::TransToPtenDataType(ToVarType(req_var.data_type()))); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(), - tensor->numel() * SizeOfType(tensor->type()), stream); + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype())), + stream); #else memcpy(tensor_data, req_var.data().data(), - tensor->numel() * SizeOfType(tensor->type())); + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); #endif } #endif @@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, } tensor->set_lod(lod); - void* tensor_data = - tensor->mutable_data(place, ToVarType(req_var.data_type())); + void* tensor_data = tensor->mutable_data( + place, framework::TransToPtenDataType(ToVarType(req_var.data_type()))); #ifdef PADDLE_WITH_XPU memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(), - tensor->numel() * SizeOfType(tensor->type())); + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); #else memcpy(tensor_data, req_var.data().data(), - tensor->numel() * SizeOfType(tensor->type())); + tensor->numel() * + SizeOfType(framework::TransToProtoVarType(tensor->dtype()))); #endif } diff --git a/paddle/fluid/framework/heter_section_worker.cc b/paddle/fluid/framework/heter_section_worker.cc index 8e94bb1d0e149..6f74d088764d4 100644 --- a/paddle/fluid/framework/heter_section_worker.cc +++ b/paddle/fluid/framework/heter_section_worker.cc @@ -11,7 +11,9 @@ limitations under the License. */ #if defined(PADDLE_WITH_PSCORE) #include + #include "paddle/fluid/distributed/ps/service/heter_server.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/platform/cpu_helper.h" @@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope, auto* tensor = var->GetMutable(); std::vector dims{1}; tensor->Resize(framework::make_ddim(dims)); - void* tensor_data = - tensor->mutable_data(place, framework::proto::VarType::FP32); + void* tensor_data = tensor->mutable_data( + place, framework::TransToPtenDataType(framework::proto::VarType::FP32)); if (platform::is_gpu_place(place)) { #ifdef PADDLE_WITH_CUDA std::vector temp; - temp.resize(tensor->numel() * framework::SizeOfType(tensor->type())); + temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype())); char* temp_ptr = temp.data(); float* temp_ptr_float = reinterpret_cast(temp_ptr); temp_ptr_float[0] = micro_id; auto stream = reinterpret_cast(*dev_ctx).stream(); - memory::Copy(place, tensor_data, platform::CPUPlace(), - reinterpret_cast(temp_ptr), - tensor->numel() * framework::SizeOfType(tensor->type()), - stream); + memory::Copy( + place, tensor_data, platform::CPUPlace(), + reinterpret_cast(temp_ptr), + tensor->numel() * framework::SizeOfType( + framework::TransToProtoVarType(tensor->dtype())), + stream); #endif } else { float* temp_ptr = reinterpret_cast(tensor_data); diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc index e47681820e282..a4af56419a766 100644 --- a/paddle/fluid/framework/heterxpu_trainer.cc +++ b/paddle/fluid/framework/heterxpu_trainer.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include "io/fs.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/device_worker_factory.h" @@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) { InitializeVariable(ptr, proto::VarType::LOD_TENSOR); LoDTensor* thread_tensor = ptr->GetMutable(); -#define HeterMemcpyFunc(cpp_type, proto_type) \ - do { \ - if (root_tensor->type() == proto_type) { \ - HeterMemCpy(thread_tensor, root_tensor, place, stream); \ - } \ +#define HeterMemcpyFunc(cpp_type, proto_type) \ + do { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + HeterMemCpy(thread_tensor, root_tensor, place, stream); \ + } \ } while (0) -#define HeterMemcpyXpuFunc(cpp_type, proto_type) \ - do { \ - if (root_tensor->type() == proto_type) { \ - HeterMemCpy(thread_tensor, root_tensor, place); \ - } \ +#define HeterMemcpyXpuFunc(cpp_type, proto_type) \ + do { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + HeterMemCpy(thread_tensor, root_tensor, place); \ + } \ } while (0) #ifdef PADDLE_WITH_CUDA _ForEachDataType_(HeterMemcpyFunc); @@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, // } #define MergeCallback(cpp_type, proto_type) \ do { \ - if (root_tensor->type() == proto_type) { \ - if (thread_tensor->type() != proto_type) { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + if (framework::TransToProtoVarType(thread_tensor->dtype()) != \ + proto_type) { \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ << "] " << need_merge_var_names_[i] \ - << ", root tensor type=" << root_tensor->type() \ - << ", thread tensor type=" << thread_tensor->type(); \ + << ", root tensor type=" << root_tensor->dtype() \ + << ", thread tensor type=" << thread_tensor->dtype(); \ exit(-1); \ } \ MergeToRootScope(root_tensor, thread_tensor); \ @@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, #ifdef PADDLE_WITH_CUDA auto dev_id = thread_tensor->place().device; platform::CUDADeviceGuard guard(dev_id); - cudaMemset(thread_tensor->data(), 0, - thread_tensor->numel() * SizeOfType(thread_tensor->type())); + cudaMemset( + thread_tensor->data(), 0, + thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType( + thread_tensor->dtype()))); #endif #ifdef PADDLE_WITH_XPU auto place = thread_tensor->place(); @@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, platform::DeviceContext* dev_ctx = pool.Get(place); const platform::XPUDeviceContext* xpu_ctx = reinterpret_cast(dev_ctx); - xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0, - thread_tensor->numel() * SizeOfType(thread_tensor->type())); + xpu::memset( + xpu_ctx->x_context(), thread_tensor->data(), 0, + thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType( + thread_tensor->dtype()))); #endif } else { memset(thread_tensor->data(), 0, - thread_tensor->numel() * SizeOfType(thread_tensor->type())); + thread_tensor->numel() * + SizeOfType( + framework::TransToProtoVarType(thread_tensor->dtype()))); } } auto* merge_var = response->add_vars(); @@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, #ifdef PADDLE_WITH_CUDA auto dev_id = root_tensor->place().device; platform::CUDADeviceGuard guard(dev_id); - cudaMemset(root_tensor->data(), 0, - root_tensor->numel() * SizeOfType(root_tensor->type())); + cudaMemset( + root_tensor->data(), 0, + root_tensor->numel() * + SizeOfType(framework::TransToProtoVarType(root_tensor->dtype()))); #endif #ifdef PADDLE_WITH_XPU auto place = root_tensor->place(); @@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, platform::DeviceContext* dev_ctx = pool.Get(place); const platform::XPUDeviceContext* xpu_ctx = reinterpret_cast(dev_ctx); - xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0, - root_tensor->numel() * SizeOfType(root_tensor->type())); + xpu::memset( + xpu_ctx->x_context(), root_tensor->data(), 0, + root_tensor->numel() * + SizeOfType(framework::TransToProtoVarType(root_tensor->dtype()))); #endif } else { memset(root_tensor->data(), 0, - root_tensor->numel() * SizeOfType(root_tensor->type())); + root_tensor->numel() * SizeOfType(framework::TransToProtoVarType( + root_tensor->dtype()))); } } return 0; diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc index 0b4c8f4a719af..cb33e87f490c2 100644 --- a/paddle/fluid/framework/hogwild_worker.cc +++ b/paddle/fluid/framework/hogwild_worker.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" @@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) { LoDTensor *thread_tensor = ptr1->GetMutable(); LoDTensor *root_tensor = root_scope_->FindVar(var->Name())->GetMutable(); -#define MemsetCallback(cpp_type, proto_type) \ - do { \ - if (root_tensor->type() == proto_type) { \ - SetZero(thread_tensor, root_tensor, tensor_dim); \ - } \ +#define MemsetCallback(cpp_type, proto_type) \ + do { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + SetZero(thread_tensor, root_tensor, tensor_dim); \ + } \ } while (0) _ForEachDataType_(MemsetCallback); } diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index efe2423b0e8a5..9e1958973d2d9 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/platform/enforce.h" @@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor { } } else { auto* var = BOOST_GET_CONST(VarDesc*, var_); - return pten::TransToPtenDataType(var->GetDataType()); + return paddle::framework::TransToPtenDataType(var->GetDataType()); } } @@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor { } } else { auto* var = BOOST_GET(VarDesc*, var_); - var->SetDataType(pten::TransToProtoVarType(dtype)); + var->SetDataType(paddle::framework::TransToProtoVarType(dtype)); } } diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc index 30a1de15cb052..695c6bcdd169d 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc @@ -16,6 +16,7 @@ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_version_registry.h" namespace pten { @@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { patterns::PDNodeName(name_scope_, "eltwise_y_in")); // Set shape && datatype manually eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims())); - eltwise_y_in_desc.SetDataType(ac_bias_tensor->type()); + eltwise_y_in_desc.SetDataType( + framework::TransToProtoVarType(ac_bias_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel()); eltwise_y_in_desc.SetPersistable(true); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 194686825ff2d..927a1bfd5c972 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -16,6 +16,7 @@ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" @@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { VarDesc eltwise_y_in_desc( patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in")); eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims())); - eltwise_y_in_desc.SetDataType(bn_bias_tensor->type()); + eltwise_y_in_desc.SetDataType( + framework::TransToProtoVarType(bn_bias_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel()); eltwise_y_in_desc.SetPersistable(true); auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc); @@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const { name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count))); eltwise_y_in_desc.SetShape( framework::vectorize(eltwise_y_in_tensor->dims())); - eltwise_y_in_desc.SetDataType(eltwise_y_in_tensor->type()); + eltwise_y_in_desc.SetDataType( + framework::TransToProtoVarType(eltwise_y_in_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel()); eltwise_y_in_desc.SetPersistable(true); auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc); diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc index 9babfd1c982df..c9cefea836054 100644 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc @@ -14,6 +14,7 @@ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { "but has %s elements.", eps_tensor->numel())); CHECK_TRUE( - eps_tensor->type() == proto::VarType::FP32, + framework::TransToProtoVarType(eps_tensor->dtype()) == + proto::VarType::FP32, ::paddle::string::Sprintf("The LayerNorm divisor epsilon value " "must be of FP32 data type, but is %s.", - eps_tensor->type())); + eps_tensor->dtype())); CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"), "Validation of input mean node failed."); @@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable(); VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale")); new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]}); - new_gamma_desc.SetDataType(gamma_tensor->type()); + new_gamma_desc.SetDataType( + framework::TransToProtoVarType(gamma_tensor->dtype())); new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel()); new_gamma_desc.SetPersistable(true); auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc); @@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable(); VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias")); new_beta_desc.SetShape({layer_norm_x_mat_dims[1]}); - new_beta_desc.SetDataType(beta_tensor->type()); + new_beta_desc.SetDataType( + framework::TransToProtoVarType(beta_tensor->dtype())); new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel()); new_beta_desc.SetPersistable(true); auto* new_beta_node = g->CreateVarNode(&new_beta_desc); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc index e41c35ba33fdc..dafcc9c4e16a3 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, const char* var_name) { auto x = scope->Var(var_name); auto tensor = x->GetMutable(); - tensor->mutable_data(place, proto::VarType::FP32, 1); + tensor->mutable_data(place, + framework::TransToPtenDataType(proto::VarType::FP32), 1); } void MainTest(bool convWithExistingBias) { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 7bcb127450a35..3a78c229bd8fa 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, const char* var_name) { auto x = scope->Var(var_name); auto tensor = x->GetMutable(); - tensor->mutable_data(place, proto::VarType::FP32, 1); + tensor->mutable_data(place, + framework::TransToPtenDataType(proto::VarType::FP32), 1); } void PreparePass(std::unique_ptr* graph, const ProgramDesc& prog, diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc index f89893c050b8c..4077700b28f2e 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc @@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, const char* var_name) { auto x = scope->Var(var_name); auto tensor = x->GetMutable(); - tensor->mutable_data(place, proto::VarType::FP32, 1); + tensor->mutable_data(place, + framework::TransToPtenDataType(proto::VarType::FP32), 1); } void PrepareGraph(std::unique_ptr* graph, const ProgramDesc& prog) { diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index ab2e30a15ea15..a7f186ff0030f 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/version.h" namespace paddle { @@ -327,7 +328,7 @@ std::vector SplitLoDTensor( for (size_t i = 0; i < places.size(); ++i) { LoDTensor dst; dst.Resize(src.dims()); - dst.mutable_data(places[i], src.type()); + dst.mutable_data(places[i], src.dtype()); if (!src.lod().empty()) { dst.set_lod(src.lod()); } @@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target, for (auto *t : lod_tensors) { if (t->numel() && t->IsInitialized()) { new_dim = t->dims(); - new_type = t->type(); + new_type = framework::TransToProtoVarType(t->dtype()); new_layout = t->layout(); break; } @@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target, auto *t = lod_tensors[i]; if (t->numel() && t->IsInitialized()) { PADDLE_ENFORCE_EQ( - new_type, t->type(), + new_type, framework::TransToProtoVarType(t->dtype()), platform::errors::InvalidArgument( "LoDTensor data type does not match, expected type is %s, actual " "type is %s.", - DataTypeToString(new_type), DataTypeToString(t->type()))); + DataTypeToString(new_type), + DataTypeToString(framework::TransToProtoVarType(t->dtype())))); PADDLE_ENFORCE_EQ( new_layout, t->layout(), platform::errors::InvalidArgument( @@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target, target->Resize(new_dim); target->set_layout(new_layout); target->set_lod(new_lod); - target->mutable_data(dst_place, new_type); + target->mutable_data(dst_place, + paddle::framework::TransToPtenDataType(new_type)); int begin = 0; for (auto *src : lod_tensors) { diff --git a/paddle/fluid/framework/multi_trainer.cc b/paddle/fluid/framework/multi_trainer.cc index 32d3cdef4512b..0eb2ef4837bbe 100644 --- a/paddle/fluid/framework/multi_trainer.cc +++ b/paddle/fluid/framework/multi_trainer.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/platform/lodtensor_printer.h" @@ -250,12 +251,13 @@ void MultiTrainer::Finalize() { LoDTensor* thread_tensor = thread_var->GetMutable(); #define MergeCallback(cpp_type, proto_type) \ do { \ - if (root_tensor->type() == proto_type) { \ - if (thread_tensor->type() != proto_type) { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + if (framework::TransToProtoVarType(thread_tensor->dtype()) != \ + proto_type) { \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ << "] " << need_merge_var_names_[i] \ - << ", root tensor type=" << root_tensor->type() \ - << ", thread tensor type=" << thread_tensor->type(); \ + << ", root tensor type=" << root_tensor->dtype() \ + << ", thread tensor type=" << thread_tensor->dtype(); \ exit(-1); \ } \ MergeToRootScope(root_tensor, thread_tensor); \ diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index 3fe9e877658da..328bfdbf310ea 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/data_transfer.h" +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace framework { @@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node, continue; } // only focus on complex dtype now - auto src_type = grad_tensor->type(); + auto src_type = framework::TransToProtoVarType(grad_tensor->dtype()); if (!framework::IsComplexType(src_type)) { VLOG(3) << "skip grad_tensor with not complexType"; continue; @@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node, platform::errors::Unavailable( "Forward tensor is nullptr when handle complex data to real.")); // only need record type, the allocation may have been released - auto dst_type = tensor->saved_type(); + auto dst_type = framework::TransToProtoVarType(tensor->dtype()); // only focus on real dtype and need casting if (framework::IsComplexType(dst_type)) { continue; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 6af5a70d66085..4670f043102d9 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "gflags/gflags.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" @@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) { if (UNLIKELY(!tensor.IsInitialized())) { return ""; } - return DataTypeToString(tensor.type()); + return DataTypeToString(framework::TransToProtoVarType(tensor.dtype())); } else if (var->IsType()) { auto tensor = var->Get().value(); if (UNLIKELY(!tensor.IsInitialized())) { return "uninited"; } else { - return DataTypeToString(tensor.type()); + return DataTypeToString(framework::TransToProtoVarType(tensor.dtype())); } } else if (var->IsType()) { return "strings"; @@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type, if (tensor.memory_size() == 0) { return; } - if (tensor.type() != proto::VarType::FP32 && - tensor.type() != proto::VarType::FP64) { + if (framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP32 && + framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) { return; } PADDLE_ENFORCE_NE( @@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad( continue; } // only focus on complex dtype now - auto src_type = grad_tensor->type(); + auto src_type = framework::TransToProtoVarType(grad_tensor->dtype()); if (!IsComplexType(src_type)) { continue; } @@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad( platform::errors::Unavailable( "Forward tensor is nullptr when handle complex data to real.")); // only need record type, the allocation may have been released - auto dst_type = tensor->saved_type(); + auto dst_type = framework::TransToProtoVarType(tensor->dtype()); // only focus on real dtype and need casting if (IsComplexType(dst_type)) { continue; @@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType( platform::errors::InvalidArgument("The %s Op's Input Variable `%s` " "contains uninitialized Tensor.", Type(), name)); - proto::VarType::Type tmp = t->type(); + proto::VarType::Type tmp = + paddle::framework::TransToProtoVarType(t->dtype()); PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type, platform::errors::InvalidArgument( "The DataType of %s Op's duplicable or different " @@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes( auto* tensor_b = GetTensorFormInputSafely(ctx, name2); // 2. Get two input types - auto type_a = tensor_a->type(); - auto type_b = tensor_b->type(); + auto type_a = framework::TransToProtoVarType(tensor_a->dtype()); + auto type_b = framework::TransToProtoVarType(tensor_b->dtype()); // 3. Get first input type or promote complex types auto target_type = PromoteTypesIfComplexExists(type_a, type_b); @@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext( pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); } else if (attr_defs[i].type_index == std::type_index(typeid(pten::DataType))) { - auto data_type = pten::TransToPtenDataType( + auto data_type = paddle::framework::TransToPtenDataType( static_cast( BOOST_GET_CONST(int, attr))); pt_kernel_context->EmplaceBackAttr(data_type); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index db529bd17f4ab..1babd82f3c26a 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -40,6 +40,7 @@ limitations under the License. */ #include "paddle/fluid/platform/variant.h" #include "paddle/utils/flat_hash_map.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/pten/core/compat/arg_map_context.h" #include "paddle/pten/core/compat/op_utils.h" #include "paddle/pten/core/kernel_context.h" @@ -422,8 +423,8 @@ class ExecutionContext { "size(%d).", allocation_ptr->size(), framework::product(dim) * sizeof(T))); - paddle::framework::Tensor temp_tensor( - framework::ToDataType(std::type_index(typeid(T)))); + paddle::framework::Tensor temp_tensor(framework::TransToPtenDataType( + framework::ToDataType(std::type_index(typeid(T))))); temp_tensor.Resize(dim); temp_tensor.ResetHolder(std::move(shared_allocation)); return temp_tensor; diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index 43d62e8d8df3b..31bf8d9b726d8 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -27,6 +27,7 @@ limitations under the License. */ #include "cinn/frontend/op_mappers/use_op_mappers.h" #include "cinn/frontend/var_type_utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" @@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( // op auto tensor_type = ::paddle::framework::proto::VarType::FP32; if (!skip_trans_type) { - tensor_type = tensor.type(); + tensor_type = framework::TransToProtoVarType(tensor.dtype()); } auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type); info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type); diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc index 09df9a7ad2ce4..09bca4a735461 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "gtest/gtest.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" namespace paddle { @@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test { LoDTensor tensor; DDim dims = {256, 1024}; tensor.Resize(dims); - tensor.mutable_data(platform::CPUPlace(), proto::VarType::FP32); + tensor.mutable_data( + platform::CPUPlace(), + framework::TransToPtenDataType(framework::proto::VarType::FP32)); return tensor; }; #define FillFeedList(Name) feed_targets[#Name] = create_tensor(); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7eebfb904cf66..aed5e2c7405ac 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/async_ssa_graph_executor.h" #include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" @@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices( std::vector buffers; buffers.reserve(member_->places_.size()); size_t numel = main_tensor.numel(); - ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); + ncclDataType_t data_type = platform::ToNCCLDataType( + framework::TransToProtoVarType(main_tensor.dtype())); for (size_t i = 0; i < member_->places_.size(); ++i) { auto place = member_->places_[i]; void *buffer; @@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices( auto local_scope = member_->local_scopes_[i]; auto *t = local_scope->Var(var)->GetMutable(); t->Resize(dims); - buffer = t->mutable_data(place, main_tensor.type()); + buffer = t->mutable_data(place, main_tensor.dtype()); } buffers.push_back(buffer); } @@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices( // but broadcast is equivalent to no type of operation, does not affect // correctness. BKCLDataType data_type = BKCL_FLOAT; - // BKCLDataType data_type = platform::ToBKCLDataType(main_tensor.type()); + // BKCLDataType data_type = + // platform::ToBKCLDataType(framework::TransToProtoVarType(main_tensor.dtype())); for (size_t i = 0; i < member_->places_.size(); ++i) { auto place = member_->places_[i]; void *buffer; @@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices( auto local_scope = member_->local_scopes_[i]; auto *t = local_scope->Var(var)->GetMutable(); t->Resize(dims); - buffer = t->mutable_data(place, main_tensor.type()); + buffer = t->mutable_data(place, main_tensor.dtype()); } buffers.push_back(buffer); } @@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices( for (size_t i = 0; i < member_->places_.size(); ++i) { auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]); auto broadcast_numel = numel; - if (main_tensor.type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(main_tensor.dtype()) == + framework::proto::VarType::INT64) { broadcast_numel *= 2; } PADDLE_ENFORCE_EQ( @@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices( auto copy_memory = [&] { t->Resize(dims); - t->mutable_data(cpu, main_tensor.type()); + t->mutable_data(cpu, main_tensor.dtype()); paddle::framework::TensorCopy(main_tensor, cpu, t); }; diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc index 8f0efdf42f1ee..4d34ba85517e1 100644 --- a/paddle/fluid/framework/ps_gpu_trainer.cc +++ b/paddle/fluid/framework/ps_gpu_trainer.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "io/fs.h" #include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_set.h" @@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() { } #define MergeCallback(cpp_type, proto_type) \ do { \ - if (root_tensor->type() == proto_type) { \ - if (thread_tensor->type() != proto_type) { \ + if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \ + if (framework::TransToProtoVarType(thread_tensor->dtype()) != \ + proto_type) { \ VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \ << "] " << need_merge_var_names_[i] \ - << ", root tensor type=" << root_tensor->type() \ - << ", thread tensor type=" << thread_tensor->type(); \ + << ", root tensor type=" << root_tensor->dtype() \ + << ", thread tensor type=" << thread_tensor->dtype(); \ exit(-1); \ } \ MergeToRootScope(root_tensor, thread_tensor); \ diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc index ccf7752f2c2fe..ea2e62d89f63d 100644 --- a/paddle/fluid/framework/pten_utils.cc +++ b/paddle/fluid/framework/pten_utils.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker { OpKernelType TransPtenKernelKeyToOpKernelType( const pten::KernelKey& kernel_key) { proto::VarType::Type data_type = - pten::TransToProtoVarType(kernel_key.dtype()); + paddle::framework::TransToProtoVarType(kernel_key.dtype()); // no need to set current device id here platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false); DataLayout data_layout = kernel_key.layout(); @@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey( } paddle::experimental::DataLayout layout = kernel_type.data_layout_; paddle::experimental::DataType dtype = - pten::TransToPtenDataType(kernel_type.data_type_); + paddle::framework::TransToPtenDataType(kernel_type.data_type_); return pten::KernelKey(backend, layout, dtype); } diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc index 0f1a8e2a9ed5f..86574c5ca2be3 100644 --- a/paddle/fluid/framework/save_load_util.cc +++ b/paddle/fluid/framework/save_load_util.cc @@ -16,6 +16,7 @@ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/imperative/layer.h" namespace paddle { @@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name, auto tensor = itera.second; proto::VarType::TensorDesc desc; - desc.set_data_type(tensor->type()); + desc.set_data_type(framework::TransToProtoVarType(tensor->dtype())); auto dims = framework::vectorize(tensor->dims()); auto* pb_dims = desc.mutable_dims(); pb_dims->Resize(static_cast(dims.size()), 0); @@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name, // save tensor uint64_t data_size = - tensor->numel() * framework::SizeOfType(tensor->type()); + tensor->numel() * framework::DataTypeSize(tensor->dtype()); auto* data_ptr = tensor->data(); if (platform::is_gpu_place(tensor->place())) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index b926a3cc7659b..844b5d8269500 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/complex.h" @@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, // than numel()*size(type()) auto dst_ptr = src.layout() == DataLayout::kMKLDNN - ? dst->mutable_data(dst_place, src.type(), src.memory_size()) - : dst->mutable_data(dst_place, src.type()); + ? dst->mutable_data(dst_place, src.dtype(), src.memory_size()) + : dst->mutable_data(dst_place, src.dtype()); #else - auto dst_ptr = dst->mutable_data(dst_place, src.type()); + auto dst_ptr = dst->mutable_data(dst_place, src.dtype()); #endif if (src_ptr == dst_ptr && src_place == dst_place) { VLOG(3) << "Skip copy the same data async from " << src_place << " to " @@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, #ifdef PADDLE_WITH_MKLDNN auto size = src.layout() == DataLayout::kMKLDNN ? src.memory_size() - : src.numel() * SizeOfType(src.type()); + : src.numel() * framework::DataTypeSize(src.dtype()); #else - auto size = src.numel() * SizeOfType(src.type()); + auto size = src.numel() * framework::DataTypeSize(src.dtype()); #endif if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { @@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place, Tensor npu_pinned_tensor; npu_pinned_tensor.Resize(src.dims()); auto npu_pinned_ptr = - npu_pinned_tensor.mutable_data(npu_pinned_place, src.type()); + npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype()); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); // 2. async copy npu pinned tensor -> npu tensor @@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, #endif auto src_place = src.place(); auto src_ptr = src.data(); - auto dst_ptr = dst->mutable_data(dst_place, src.type()); + auto dst_ptr = dst->mutable_data(dst_place, src.dtype()); VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; if (src_ptr == dst_ptr && src_place == dst_place) { @@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, return; } - auto size = src.numel() * SizeOfType(src.type()); + auto size = src.numel() * framework::DataTypeSize(src.dtype()); if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); } @@ -582,8 +583,9 @@ struct AnyDTypeVisitor { template inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, const DevCtx& ctx, framework::Tensor* out) { - VisitDataType(tensor.type(), AnyDTypeVisitor( - predicate, tensor, ctx, out)); + VisitDataType( + framework::TransToProtoVarType(tensor.dtype()), + AnyDTypeVisitor(predicate, tensor, ctx, out)); } template @@ -722,8 +724,9 @@ struct AllDTypeVisitor { template inline void AllImpl(Predicate predicate, const framework::Tensor& tensor, const DevCtx& ctx, framework::Tensor* out) { - VisitDataType(tensor.type(), AllDTypeVisitor( - predicate, tensor, ctx, out)); + VisitDataType( + framework::TransToProtoVarType(tensor.dtype()), + AllDTypeVisitor(predicate, tensor, ctx, out)); } template @@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, // int32_t size // void* protobuf message proto::VarType::TensorDesc desc; - desc.set_data_type(tensor.type()); + desc.set_data_type(framework::TransToProtoVarType(tensor.dtype())); auto dims = framework::vectorize(tensor.dims()); auto* pb_dims = desc.mutable_dims(); pb_dims->Resize(static_cast(dims.size()), 0); @@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, os.write(out.data(), size); } { // the 3rd field, tensor data - uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type()); + uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype()); auto* data_ptr = tensor.data(); PADDLE_ENFORCE_LT(size, (std::numeric_limits::max)(), @@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) { dev_ctx.Wait(); } -#define PrintTensorCallback(cpp_type, proto_type) \ - do { \ - if (tensor.type() == proto_type) { \ - os << " - dtype: " << proto_type << "\n"; \ - paddle::framework::print_tensor(os, tensor); \ - return os; \ - } \ +#define PrintTensorCallback(cpp_type, proto_type) \ + do { \ + if (paddle::framework::TransToProtoVarType(tensor.dtype()) == \ + proto_type) { \ + os << " - dtype: " << proto_type << "\n"; \ + paddle::framework::print_tensor(os, tensor); \ + return os; \ + } \ } while (0) _ForEachDataType_(PrintTensorCallback); diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index f0c41e6dc0fcf..bcaf3c719cb72 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size, Tensor npu_pinned_tensor; npu_pinned_tensor.Resize(dst->dims()); auto npu_pinned_ptr = - npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type()); + npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype()); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); // 2. async copy npu pinned tensor -> npu tensor @@ -211,7 +211,7 @@ void TensorFromVector(const std::vector& src, // so pass nullptr as stream to memory::Copy(). else if (platform::is_npu_place(dst_place)) { // NOLINT // 1. vector -> npu pinned tensor - Tensor npu_pinned_tensor(dst->type()); + Tensor npu_pinned_tensor(dst->dtype()); platform::NPUPinnedPlace npu_pinned_place; auto npu_pinned_ptr = npu_pinned_tensor.mutable_data(dst->dims(), npu_pinned_place); @@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector& src, Tensor npu_pinned_tensor; npu_pinned_tensor.Resize(dst->dims()); auto npu_pinned_ptr = - npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type()); + npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype()); memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size); // 2. async copy npu pinned tensor -> npu tensor diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index 0f105ec9a3082..99adf9b92244a 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -15,6 +15,7 @@ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/imperative/all_reduce.h" +#include "paddle/fluid/framework/convert_utils.h" #ifdef PADDLE_WITH_NCCL #include @@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, const void *src_ptr = src.data(); dst->Resize(src.dims()); - auto *dst_ptr = dst->mutable_data(src.place(), src.type()); - auto nccl_dtype = platform::ToNCCLDataType(src.type()); + auto *dst_ptr = dst->mutable_data(src.place(), src.dtype()); + auto nccl_dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(src.dtype())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(), stream)); @@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst, platform::errors::Unimplemented( "Imperative mode does not support multi-CPU training yet.")); - auto dtype = src_tensor.type(); + auto dtype = framework::TransToProtoVarType(src_tensor.dtype()); auto nccl_dtype = platform::ToNCCLDataType(dtype); auto *dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(place)); @@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst, dims[0] = rows_num; auto feature_size = framework::product(dims) / dims[0]; dst_tensor->Resize(dims); - auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype); + auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype()); const auto *src_tensor_ptr = src_tensor.data(); auto sizeof_dtype = framework::SizeOfType(dtype); diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 4c91ece049301..973541e6dcc1b 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -24,6 +24,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" @@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) { // correct. var->DataType() returns the default dtype, which is float32. // Here, we use the type of the corresponding forward datatype. - tensor->mutable_data(op.place(), var->ForwardDataType()); + tensor->mutable_data( + op.place(), framework::TransToPtenDataType(var->ForwardDataType())); VLOG(6) << "Set ungenerated Grad: " << var->Name() << " as zero with dtype " << framework::DataTypeToString(var->ForwardDataType()); diff --git a/paddle/fluid/imperative/bkcl_context.cc b/paddle/fluid/imperative/bkcl_context.cc index f08dd59e39206..11abbfe7cf6a3 100644 --- a/paddle/fluid/imperative/bkcl_context.cc +++ b/paddle/fluid/imperative/bkcl_context.cc @@ -13,13 +13,14 @@ // limitations under the License. #if defined(PADDLE_WITH_XPU_BKCL) -#include "paddle/fluid/imperative/bkcl_context.h" #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/imperative/bkcl_context.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/xpu/bkcl_helper.h" #include "paddle/fluid/platform/device_context.h" @@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, const void *src_ptr = src.data(); dst->Resize(src.dims()); - auto *dst_ptr = dst->mutable_data(src.place(), src.type()); - auto bkcl_dtype = platform::ToBKCLDataType(src.type()); + auto *dst_ptr = dst->mutable_data(src.place(), src.dtype()); + auto bkcl_dtype = + platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype())); PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(), bkcl_dtype, BKCL_ADD, stream), @@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { XPUStream stream = comm->stream(); void *src_ptr = src_tensor->data(); - auto data_type = platform::ToBKCLDataType(src_tensor->type()); + auto data_type = platform::ToBKCLDataType( + framework::TransToProtoVarType(src_tensor->dtype())); PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr, src_tensor->numel(), data_type, 0, stream), diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index eeac500cac413..0c55cd9d5d08d 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/gloo_context.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" @@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor, framework::Tensor *dst_tensor) { auto gloo_wrapper = framework::GlooWrapper::GetInstance(); dst_tensor->Resize(src_tensor.dims()); - switch (src_tensor.type()) { + switch (framework::TransToProtoVarType(src_tensor.dtype())) { GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper); GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper); GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper); @@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src, VLOG(3) << "SelectedRows AllReduce start"; const auto &src_tensor = src.value(); const auto &place = src_tensor.place(); - auto dtype = src_tensor.type(); + auto dtype = framework::TransToProtoVarType(src_tensor.dtype()); // 1. Gather rows number from all workers. Here use ncclAllGather to do this, // but we can use other ways to implement is in the future const auto &src_rows = src.rows(); @@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src, std::for_each(element_nums.begin(), element_nums.end(), [feature_size](size_t &x) { x = x * feature_size; }); - auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype); + auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype()); gloo_wrapper->AllGatherVector(const_cast(src_rows_ptr), static_cast(dst_rows_ptr), rows_num_vector); diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 5eed7eca7a751..d57cb696526b4 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -18,6 +18,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/imperative/layer.h" @@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) { "%zu and the number of elements of destination tensor is %zu.", numel, dst_tensor->numel())); - auto data_type = src_tensor.type(); + auto data_type = framework::TransToProtoVarType(src_tensor.dtype()); auto place = src_tensor.place(); - PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type, + PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(dst_tensor->dtype()), + data_type, platform::errors::PreconditionNotMet( "The data type of source tensor and destination tensor " "should be equal, Otherwise, the calculation results " @@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) { const pten::SelectedRows& src_selected_rows = GetInnerTensor(src); auto place = dst_tensor->place(); - auto data_type = src_selected_rows.value().type(); + auto data_type = + framework::TransToProtoVarType(src_selected_rows.value().dtype()); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); #define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \ @@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var, const pten::DenseTensor& src_tensor = GetInnerTensor(src_tensor_var); const auto& place = src_tensor.place(); - auto data_type = src_tensor.type(); + auto data_type = framework::TransToProtoVarType(src_tensor.dtype()); auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); pten::DenseTensor* dst_tensor = GetInnerMutableTensor(dst_tensor_var); dst_tensor->Resize(src_tensor.dims()); - dst_tensor->mutable_data(place, data_type); + dst_tensor->mutable_data(place, src_tensor.dtype()); + #define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \ if (data_type == framework::DataTypeTrait::DataType()) { \ paddle::operators::math::SelectedRowsAddTensor \ @@ -477,7 +481,8 @@ std::shared_ptr SelectedRowsMerge( auto& src_selected_rows1 = src1.Get(); auto& src_selected_rows2 = src2.Get(); auto place = src_selected_rows1.value().place(); - auto data_type = src_selected_rows1.value().type(); + auto data_type = + framework::TransToProtoVarType(src_selected_rows1.value().dtype()); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); std::vector src_selected_rows; @@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr var, VLOG(6) << "Dims of " << dst_var->Name() << " is set as: " << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); - tensor->mutable_data(place, var->DataType()); + tensor->mutable_data(place, + framework::TransToPtenDataType(var->DataType())); pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); - tensor->mutable_data(place, var->DataType()); + tensor->mutable_data(place, + framework::TransToPtenDataType(var->DataType())); pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } @@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr var, VLOG(6) << "Dims of " << dst_var->Name() << " is set as: " << var->Var().Get().dims(); tensor->Resize(var->Var().Get().dims()); - tensor->mutable_data(place, var->DataType()); + tensor->mutable_data(place, + framework::TransToPtenDataType(var->DataType())); pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } else { auto* tensor = dst_var->MutableVar()->GetMutable(); - tensor->mutable_data(place, var->DataType()); + tensor->mutable_data(place, + framework::TransToPtenDataType(var->DataType())); pten::funcs::set_constant(*dev_ctx, tensor, 0.0); } } diff --git a/paddle/fluid/imperative/hccl_context.cc b/paddle/fluid/imperative/hccl_context.cc index 7292c0f82fced..31d988753f23c 100644 --- a/paddle/fluid/imperative/hccl_context.cc +++ b/paddle/fluid/imperative/hccl_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/hccl_context.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" @@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, void *src_ptr = const_cast(src.data()); dst->Resize(src.dims()); - void *dst_ptr = dst->mutable_data(src.place(), src.type()); - HcclDataType hccl_dtype = platform::ToHCCLDataType(src.type()); + void *dst_ptr = dst->mutable_data(src.place(), src.dtype()); + HcclDataType hccl_dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(src.dtype())); PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce( src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(), @@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { void *src_ptr = reinterpret_cast(const_cast(src_tensor->data())); - auto hccl_dtype = platform::ToHCCLDataType(src_tensor->type()); + auto hccl_dtype = platform::ToHCCLDataType( + framework::TransToProtoVarType(src_tensor->dtype())); PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast( src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(), reinterpret_cast(stream))); diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc index 65309b66db594..c3f04cba36d89 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.cc +++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/jit/program_desc_tracer.h" +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace imperative { @@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist( new_var_desc->SetShape(framework::vectorize(tensor.dims())); new_var_desc->SetLoDLevel(tensor.lod().size()); if (tensor.IsInitialized()) { - new_var_desc->SetDataType(tensor.type()); + new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype())); } else { new_var_desc->SetDataType(framework::proto::VarType::FP32); } diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 60e1291a08700..b8c423f77bd23 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -15,6 +15,8 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/framework/convert_utils.h" + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/imperative/infer_var_type_context.h" #include "paddle/fluid/imperative/op_base.h" @@ -99,7 +101,9 @@ static std::string DebugString( auto& tensor = var.Get(); ss << "LoDTensor<"; if (tensor.IsInitialized()) { - ss << framework::DataTypeToString(tensor.type()) << ", "; + ss << framework::DataTypeToString( + framework::TransToProtoVarType(tensor.dtype())) + << ", "; ss << tensor.place() << ", "; ss << "(" << tensor.dims() << ")"; } else { @@ -112,7 +116,9 @@ static std::string DebugString( auto& tensor = selected_rows.value(); auto& rows = selected_rows.rows(); if (tensor.IsInitialized()) { - ss << framework::DataTypeToString(tensor.type()) << ", "; + ss << framework::DataTypeToString( + framework::TransToProtoVarType(tensor.dtype())) + << ", "; ss << tensor.place() << ", "; ss << "height(" << selected_rows.height() << "), rows("; std::for_each(rows.cbegin(), rows.cend(), diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index 066d0db134817..e9d987cc7045f 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -25,6 +25,7 @@ #include "paddle/fluid/platform/dynload/nccl.h" #endif +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" @@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { gpuStream_t stream = comm->stream(); void *src_ptr = src_tensor->data(); - auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type()); + auto nccl_dtype = platform::ToNCCLDataType( + framework::TransToProtoVarType(src_tensor->dtype())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream)); } diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index ed60a4dc0849b..fcadd0046fe2c 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -24,6 +24,7 @@ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/op_base.h" @@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var, // we can't get data_type_ directly. We need to check if we can only use // default data_type for now. if (ref_var.ForwardDataType() != -1) { - dst_tensor->mutable_data(place, ref_var.ForwardDataType()); + dst_tensor->mutable_data( + place, framework::TransToPtenDataType(ref_var.ForwardDataType())); } else { - dst_tensor->mutable_data(place, ref_var.DataType()); + dst_tensor->mutable_data( + place, framework::TransToPtenDataType(ref_var.DataType())); } pten::funcs::set_constant(*dev_ctx, dst_tensor, value); } @@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask( platform::errors::InvalidArgument( "The %d-th grad_output's shape does not match the %d-th output", i, i)); - PADDLE_ENFORCE_EQ(grad_tensor.type(), out_tensor.type(), + PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(grad_tensor.dtype()), + framework::TransToProtoVarType(out_tensor.dtype()), platform::errors::InvalidArgument( "The %d-th grad_output's data type does not " "match the %d-th output", diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index b3b3725ba993e..d5dc53196dd7f 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -29,6 +29,7 @@ #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/var_helper.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/selected_rows.h" @@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext( kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr)); } else if (attr_defs[i].type_index == std::type_index(typeid(pten::DataType))) { - auto data_type = pten::TransToPtenDataType( + auto data_type = framework::TransToPtenDataType( static_cast( BOOST_GET_CONST(int, attr))); kernel_ctx->EmplaceBackAttr(data_type); diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 361b9eb0fe64f..c8b5b452453fc 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -446,7 +446,7 @@ void Reducer::InitializeGroups( InitializeDenseGroups(variable_indices_, &group); auto tensor = group.dense_contents_.GetMutable(); tensor->Resize(framework::make_ddim({group.all_length_})) - .mutable_data(place_, group.dtype_); + .mutable_data(place_, framework::TransToPtenDataType(group.dtype_)); } // map variables to this group by VariableLocator @@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) { // by avoiding tensor construction if (!group_tensor.IsInitialized()) { group_tensor.Resize({static_cast(length)}); - group_tensor.mutable_data(place_, group.dtype_); + group_tensor.mutable_data(place_, + framework::TransToPtenDataType(group.dtype_)); } #ifdef PADDLE_WITH_XPU_BKCL diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index e91b0b0a7770e..584f8ead3d8de 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -17,6 +17,7 @@ #include #include "gtest/gtest.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/memory/memcpy.h" @@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1, auto* t1_p = t1.data(); auto* t2_p = t2.data(); - return std::memcmp(t1_p, t2_p, - t1.numel() * framework::SizeOfType(t1.type())) == 0; + return std::memcmp( + t1_p, t2_p, + t1.numel() * framework::SizeOfType( + framework::TransToProtoVarType(t1.dtype()))) == 0; } template diff --git a/paddle/fluid/imperative/tests/test_group.cc b/paddle/fluid/imperative/tests/test_group.cc index 0c058038968be..b4a5ff90fe609 100644 --- a/paddle/fluid/imperative/tests/test_group.cc +++ b/paddle/fluid/imperative/tests/test_group.cc @@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) { tmp.ShareDataWith(*tensor).Resize({static_cast(len)}); group.dense_tensors_.push_back(std::move(tmp)); group.all_length_ += len; - group.dtype_ = tensor->type(); + group.dtype_ = framework::TransToProtoVarType(tensor->dtype()); } paddle::platform::DeviceContextPool& pool = @@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) { { // concat auto* tensor = group.dense_contents_.GetMutable(); tensor->Resize(framework::make_ddim({group.all_length_})) - .mutable_data(place, group.dtype_); + .mutable_data(place, framework::TransToPtenDataType(group.dtype_)); group.ConcatTensors(*dev_ctx); group.DivNRanks(*dev_ctx, 1); diff --git a/paddle/fluid/imperative/var_helper.cc b/paddle/fluid/imperative/var_helper.cc index 4686aef5afdf7..3548f2eeafd24 100644 --- a/paddle/fluid/imperative/var_helper.cc +++ b/paddle/fluid/imperative/var_helper.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/imperative/var_helper.h" #include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -170,9 +171,11 @@ template <> framework::proto::VarType::Type GetDataType( std::shared_ptr var) { if (var->Var().IsType()) { - return var->Var().Get().value().type(); + return framework::TransToProtoVarType( + var->Var().Get().value().type()); } else if (var->Var().IsType()) { - return var->Var().Get().type(); + return framework::TransToProtoVarType( + var->Var().Get().type()); } else { PADDLE_THROW(paddle::platform::errors::PermissionDenied( "We only support pten::SelectedRows and framework::LoDTensor in " diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index bd96cd3f1aa17..700b859b0b7fa 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -19,6 +19,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/framework/variable.h" @@ -169,7 +170,7 @@ class VariableWrapper { } } if (tensor && tensor->IsInitialized()) { - return tensor->type(); + return framework::TransToProtoVarType(tensor->dtype()); } else { VLOG(6) << "The tensor of variable " << name_ << " is not initialized"; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 694e84ad756b5..caac973d8b89a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, framework::FetchType &fetch_var = framework::GetFetchVariable(*scope, "fetch", idx); auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var); - auto type = fetch.type(); + auto type = framework::TransToProtoVarType(fetch.dtype()); auto output = &(outputs->at(i)); output->name = fetches_[idx]->Input("X")[0]; if (type == framework::proto::VarType::FP32) { diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index d1f49b84f0679..c6de967afb71d 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, framework::FetchType &fetch_var = framework::GetFetchVariable(*scope, "fetch", idx); auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var); - auto type = fetch.type(); + auto type = framework::TransToProtoVarType(fetch.dtype()); auto output = &(outputs->at(i)); output->name = fetchs_[idx]->Input("X")[0]; if (type == framework::DataTypeTrait::DataType()) { diff --git a/paddle/fluid/inference/api/api_impl_tester.cc b/paddle/fluid/inference/api/api_impl_tester.cc index 124279d246093..41a022cd164f9 100644 --- a/paddle/fluid/inference/api/api_impl_tester.cc +++ b/paddle/fluid/inference/api/api_impl_tester.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include // NOLINT #include "gflags/gflags.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -36,13 +37,16 @@ namespace paddle { PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { PaddleTensor pt; - if (t->type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(t->dtype()) == + framework::proto::VarType::INT64) { pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); pt.dtype = PaddleDType::INT64; - } else if (t->type() == framework::proto::VarType::FP32) { + } else if (framework::TransToProtoVarType(t->dtype()) == + framework::proto::VarType::FP32) { pt.data.Reset(t->data(), t->numel() * sizeof(float)); pt.dtype = PaddleDType::FLOAT32; - } else if (t->type() == framework::proto::VarType::INT32) { + } else if (framework::TransToProtoVarType(t->dtype()) == + framework::proto::VarType::INT32) { pt.data.Reset(t->data(), t->numel() * sizeof(int32_t)); pt.dtype = PaddleDType::INT32; } else { diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 7fe980bf40641..5dc05711807e6 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const { DataType Tensor::type() const { EAGER_GET_TENSOR(paddle::framework::LoDTensor); - auto type = tensor->type(); + auto type = paddle::framework::TransToProtoVarType(tensor->dtype()); if (type == paddle::framework::proto::VarType::FP32) { return DataType::FLOAT32; } else if (type == paddle::framework::proto::VarType::FP16) { diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index c4e195b6ec8fa..12d7f78e169cc 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, snprintf(output_i.name, output_names[i].length() + 1, "%s", output_names[i].c_str()); auto output_t = predictor->GetOutputTensor(output_names[i]); - output_i.dtype = ConvertToPDDataType(output_t->type()); + output_i.dtype = + ConvertToPDDataType(framework::TransToProtoVarType(output_t->dtype())); std::vector output_shape = output_t->shape(); output_i.shape = new int[output_shape.size()]; memmove(output_i.shape, output_shape.data(), @@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor, void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) { auto output = predictor->predictor->GetOutputTensor(tensor->name); - tensor->dtype = ConvertToPDDataType(output->type()); + tensor->dtype = + ConvertToPDDataType(framework::TransToProtoVarType(output->dtype())); auto shape = output->shape(); size_t shape_size = shape.size(); if (tensor->shape.capacity < shape_size * sizeof(int)) { @@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) { int n = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - size_t length = n * paddle::PaddleDtypeSize(output->type()); + size_t length = n * paddle::PaddleDtypeSize( + framework::TransToProtoVarType(output->dtype())); if (tensor->data.capacity < length) { if (tensor->data.data) { std::free(tensor->data.data); diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc index 27e3417933806..22d4476f37e89 100644 --- a/paddle/fluid/inference/lite/tensor_utils.cc +++ b/paddle/fluid/inference/lite/tensor_utils.cc @@ -16,6 +16,7 @@ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/memory/allocation/allocator.h" @@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst, // the input tensor. constexpr int empty_size = 0; dst->Resize({empty_size}); - GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()), - GetLiteTargetType(src.place())); - dst->SetPrecision(GetLitePrecisionType(src.type())); + GetLiteTensorDataPtr( + dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())), + GetLiteTargetType(src.place())); + dst->SetPrecision( + GetLitePrecisionType(framework::TransToProtoVarType(src.dtype()))); paddle::lite::LoD lite_lod; SetLoD(&lite_lod, src.lod()); dst->SetLoD(lite_lod); @@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst, void InitDstTensor(framework::LoDTensor* dst, const paddle::lite_api::Tensor& src) { - dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()), - GetNativePrecisionType(src.precision())); + dst->mutable_data( + inference::lite::utils::GetNativePlace(src.target()), + framework::TransToPtenDataType(GetNativePrecisionType(src.precision()))); SetLoD(dst->mutable_lod(), src.lod()); } @@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst, const platform::Place& src_place = src.place(); const platform::Place& dst_place = GetNativePlace(dst->target()); const size_t bytes = - static_cast(src.numel()) * framework::SizeOfType(src.type()); + static_cast(src.numel()) * framework::DataTypeSize(src.dtype()); dst->Resize(framework::vectorize(src.dims())); const void* src_data = src.data(); void* dst_data{nullptr}; - dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()), - GetLiteTargetType(src.place())); + dst_data = GetLiteTensorDataPtr( + dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())), + GetLiteTargetType(src.place())); VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src - << ", dst = " << dst << ", src_type = " << src.type(); + << ", dst = " << dst + << ", src_type = " << framework::TransToProtoVarType(src.dtype()); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); VLOG(3) << "[Lite memory size] Bytes = " << bytes; } @@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst, const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& dst_place = dst->place(); int64_t src_numel = GetLiteTensorNumel(src); - const size_t bytes = src_numel * framework::SizeOfType(dst->type()); + const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype()); const void* src_data = src.data(); // When Lite is ready, the source type needs to be modified here. - void* dst_data = dst->mutable_data(dst_place, dst->type()); + void* dst_data = dst->mutable_data(dst_place, dst->dtype()); VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src - << ", dst = " << dst << ", src_type = " << dst->type(); + << ", dst = " << dst + << ", src_type = " << framework::TransToProtoVarType(dst->dtype()); MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx); VLOG(3) << "[Lite memory size] Bytes = " << bytes; } @@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) { dst->Resize(framework::vectorize(src->dims())); dst->ShareExternalMemory(src->data(), src->memory_size(), GetLiteTargetType(src->place())); - dst->SetPrecision(GetLitePrecisionType(src->type())); + dst->SetPrecision( + GetLitePrecisionType(framework::TransToProtoVarType(src->dtype()))); paddle::lite::LoD lite_lod; SetLoD(&lite_lod, src->lod()); dst->SetLoD(lite_lod); @@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) { src_raw_data, memory_size, GetNativePlace(src->target()))); dst->Resize(paddle::framework::make_ddim(src->shape())); SetLoD(dst->mutable_lod(), src->lod()); - dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision())); + dst->ResetHolderWithType( + holder, + framework::TransToPtenDataType(GetNativePrecisionType(src->precision()))); } } // namespace utils diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 01953bd721f3e..0757f5c505c61 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, } for (size_t i = 0; i < a_size; i++) { - if (a.type() == VarType::FP32) { + if (framework::TransToProtoVarType(a.dtype()) == VarType::FP32) { const auto *a_data = a.data(); const auto *b_data = b.data(); if (std::abs(a_data[i] - b_data[i]) > 1e-3) { @@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, b_data[i]); return false; } - } else if (a.type() == VarType::INT64) { + } else if (framework::TransToProtoVarType(a.dtype()) == VarType::INT64) { const auto *a_data = a.data(); const auto *b_data = b.data(); if (std::abs(a_data[i] - b_data[i]) > 1e-3) { diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc index 6583ad0a4952c..5a09933e0ee24 100644 --- a/paddle/fluid/operators/abs_op.cc +++ b/paddle/fluid/operators/abs_op.cc @@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, const framework::Tensor& tensor, const framework::OpKernelType& expected_kernel_type) const { - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } }; diff --git a/paddle/fluid/operators/activation_op_mlu.cc b/paddle/fluid/operators/activation_op_mlu.cc index caa498faddaa1..a2ac1e7d42d45 100644 --- a/paddle/fluid/operators/activation_op_mlu.cc +++ b/paddle/fluid/operators/activation_op_mlu.cc @@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel { output->mutable_data(ctx.GetPlace()); MLUCnnlActivationDesc act_desc(act_mode, alpha); - MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(input->type())); - MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(output->type())); + MLUCnnlTensorDesc input_desc( + *input, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(input->dtype()))); + MLUCnnlTensorDesc output_desc( + *output, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(output->dtype()))); MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(), reinterpret_cast(input->data()), @@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel { dx->mutable_data(ctx.GetPlace()); - MLUCnnlTensorDesc dout_desc(*dout, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(dout->type())); - MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(out->type())); - MLUCnnlTensorDesc dx_desc(*dx, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(dx->type())); + MLUCnnlTensorDesc dout_desc( + *dout, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(dout->dtype()))); + MLUCnnlTensorDesc out_desc( + *out, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(out->dtype()))); + MLUCnnlTensorDesc dx_desc( + *dx, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(dx->dtype()))); MLUCnnlActivationDesc act_desc(act_mode, alpha); MLUCnnl::ActiveGrad( ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr, diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc index 3d8a1c74cd663..344eb2348ab3c 100644 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel { // Step 2: Construct a broadcast factor, which has the same shape with x. // 2.1 Get a factor tensor with shape [1]. - Tensor factor_tensor(framework::proto::VarType::FP32); + Tensor factor_tensor(experimental::DataType::FLOAT32); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, factor); // 2.2 Get the factor which has the shape with x and the same value with // factor. - Tensor factor_bc_tensor(framework::proto::VarType::FP32); + Tensor factor_bc_tensor(experimental::DataType::FLOAT32); factor_bc_tensor.mutable_data(x_dims, place); const auto& runner_bc = NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor}, @@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel { {{"dims", framework::vectorize(x->dims())}}); runner_fill.Run(stream); - Tensor tmp_bool(framework::proto::VarType::BOOL); + Tensor tmp_bool(experimental::DataType::BOOL); tmp_bool.mutable_data(x->dims(), place); const auto& runner_less = NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool}); runner_less.Run(stream); Tensor tmp4(x->type()); tmp4.mutable_data(x->dims(), place); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast = NpuOpRunner("Cast", {tmp_bool}, {tmp4}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/allclose_op.h b/paddle/fluid/operators/allclose_op.h index b5683a1d9a93c..7a36754194ace 100644 --- a/paddle/fluid/operators/allclose_op.h +++ b/paddle/fluid/operators/allclose_op.h @@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel { rtol->numel(), 1, platform::errors::InvalidArgument( "Input(Rtol) size must be 1, but get %d.", rtol->numel())); - PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64, - platform::errors::InvalidArgument( - "Input(Rtol) type must be double, but get %s.", - framework::DataTypeToString(rtol->type()))); + PADDLE_ENFORCE_EQ( + framework::TransToProtoVarType(rtol->dtype()), + framework::proto::VarType::FP64, + platform::errors::InvalidArgument( + "Input(Rtol) type must be double, but get %s.", + framework::DataTypeToString( + framework::TransToProtoVarType(rtol->dtype())))); rtol_v = get_tensor_value(dev_ctx, *rtol); } if (ctx.HasInput("Atol")) { @@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel { atol->numel(), 1, platform::errors::InvalidArgument( "Input(Atol) size must be 1, but get %d", atol->numel())); - PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64, - platform::errors::InvalidArgument( - "Input(Atol) type must be double, but get %s", - framework::DataTypeToString(atol->type()))); + PADDLE_ENFORCE_EQ( + framework::TransToProtoVarType(atol->dtype()), + framework::proto::VarType::FP64, + platform::errors::InvalidArgument( + "Input(Atol) type must be double, but get %s", + framework::DataTypeToString( + framework::TransToProtoVarType(atol->dtype())))); atol_v = get_tensor_value(dev_ctx, *atol); } diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc index 0a710dd842fd4..1393da7dd57a7 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc @@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx, platform::NPUMemsetAsync(static_cast(g), 0, good_out_tensor->numel() * sizeof(int), stream); // bad_out_data = bad_in_data + 1 - Tensor factor_tensor(bad_out_tensor->type()); + Tensor factor_tensor(bad_out_tensor->dtype()); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor}, @@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx, bad_out_tensor->numel() * sizeof(int), stream); // good_out_data = good_in_data + 1 - Tensor factor_tensor(good_out_tensor->type()); + Tensor factor_tensor(good_out_tensor->dtype()); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor}, @@ -188,7 +188,7 @@ class LazyZerosNPU { if (!found_inf_vec[0]) { framework::TensorCopy(*x, place, dev_ctx, out); } else if (zero_ptr != dst_ptr) { - auto size = out->numel() * framework::SizeOfType(out->type()); + auto size = out->numel() * framework::DataTypeSize(out->dtype()); memory::Copy(place, dst_ptr, place, zero_ptr, size, stream); } } diff --git a/paddle/fluid/operators/argsort_op_npu.cc b/paddle/fluid/operators/argsort_op_npu.cc index b7290232e87be..a632b4df5dd79 100644 --- a/paddle/fluid/operators/argsort_op_npu.cc +++ b/paddle/fluid/operators/argsort_op_npu.cc @@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel { framework::NPUAttributeMap attr = {{"axis", -1}, {"descending", descending}}; - Tensor indices_tmp(framework::proto::VarType::INT32); + Tensor indices_tmp(experimental::DataType::INT32); indices_tmp.Resize(indices->dims()); - if (input->type() == framework::proto::VarType::INT64) { - Tensor input_fp32(framework::proto::VarType::FP32); + if (framework::TransToProtoVarType(input->dtype()) == + framework::proto::VarType::INT64) { + Tensor input_fp32(experimental::DataType::FLOAT32); input_fp32.Resize(input->dims()); CastToFP32(ctx, stream, *input, &input_fp32); - Tensor output_fp32(framework::proto::VarType::FP32); + Tensor output_fp32(experimental::DataType::FLOAT32); output_fp32.Resize(output->dims()); if (axis == -1 || axis + 1 == in_dims.size()) { @@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel { TranposeNPU(ctx, stream, &perm, input_fp32, &trans_input); Tensor trans_output(input_fp32.type()); - Tensor trans_indices(framework::proto::VarType::INT32); + Tensor trans_indices(experimental::DataType::INT32); trans_output.mutable_data(trans_dims, ctx.GetPlace()); trans_indices.mutable_data(trans_dims, ctx.GetPlace()); @@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel { TranposeNPU(ctx, stream, &perm, *input, &trans_input); Tensor trans_output(input->type()); - Tensor trans_indices(framework::proto::VarType::INT32); + Tensor trans_indices(experimental::DataType::INT32); trans_output.mutable_data(trans_dims, ctx.GetPlace()); trans_indices.mutable_data(trans_dims, ctx.GetPlace()); diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index a959067ddba62..f1e633283d9cc 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor { ArrayToLoDFunctorImpl functor; functor.dev_ctx_ = dev_ctx; functor.prev_functor_ = this; - framework::VisitDataType(out->type(), functor); + framework::VisitDataType(framework::TransToProtoVarType(out->dtype()), + functor); } }; @@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { "There's no element in the input array.")); int rank = x[0].dims().size(); platform::Place place = x[0].place(); - auto data_type = x[0].type(); + auto data_type = x[0].dtype(); int64_t batch_size = x[0].dims()[0]; framework::DDim ins_dims = rank > 1 ? framework::slice_ddim(x[0].dims(), 1, rank) @@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { "The current place is %d, and the previous place is %d.", i, x[i].place(), place)); PADDLE_ENFORCE_EQ( - x[i].type(), data_type, + x[i].dtype(), data_type, platform::errors::InvalidArgument( "The date type of the %zu'th element in LoDTensorArray " "differs from previous ones." "The current data type is %d, and the previous data type is %d.", - i, x[i].type(), data_type)); + i, x[i].dtype(), data_type)); batch_size += x[i].dims()[0]; } auto ins_dim_vec = framework::vectorize(ins_dims); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 8e960ff89bf51..57da23ba951ad 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( bn_param_type = framework::proto::VarType::FP64; } PADDLE_ENFORCE_EQ( - bn_param_type, ctx.Input("Scale")->type(), + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( - bn_param_type, ctx.Input("Bias")->type(), + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument("Bias input should be of float type")); PADDLE_ENFORCE_EQ( - bn_param_type, ctx.Input("Mean")->type(), + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), platform::errors::InvalidArgument("Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Variance")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), platform::errors::InvalidArgument( "Variance input should be of float type")); diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index 1590eed0bb2b0..3fae65c50177b 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/beam_search_decode_op.h" #include "paddle/fluid/platform/device_context.h" @@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase { LoDTensor* sentenceScores = ctx.Output("SentenceScores"); framework::VisitDataType( - scores->at(0).type(), + framework::TransToProtoVarType(scores->at(0).dtype()), BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores, beam_size, end_id)); } diff --git a/paddle/fluid/operators/bincount_op.cu b/paddle/fluid/operators/bincount_op.cu index dd7804625a77c..c318737ae962d 100644 --- a/paddle/fluid/operators/bincount_op.cu +++ b/paddle/fluid/operators/bincount_op.cu @@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) { PADDLE_CUDA_NUM_THREADS, 0, stream>>>( input_data, input_numel, has_weights, weights_data, output_data); } else { - const auto& weights_type = weights->type(); + const auto& weights_type = framework::TransToProtoVarType(weights->dtype()); if (weights_type == framework::proto::VarType::FP32) { float* output_data = output->mutable_data(context.GetPlace()); @@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("X"); - const auto& input_type = input->type(); + const auto& input_type = framework::TransToProtoVarType(input->dtype()); if (input_type == framework::proto::VarType::INT32) { BincountCUDAInner(context); diff --git a/paddle/fluid/operators/bincount_op.h b/paddle/fluid/operators/bincount_op.h index 3f4334099e277..60d4dffe3bf98 100644 --- a/paddle/fluid/operators/bincount_op.h +++ b/paddle/fluid/operators/bincount_op.h @@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) { if (has_weights) { const T* weights_data = weights->data(); - const auto& weights_type = weights->type(); + const auto& weights_type = framework::TransToProtoVarType(weights->dtype()); if (weights_type == framework::proto::VarType::FP32) { float* output_data = output->mutable_data(context.GetPlace()); pten::funcs::SetConstant()( @@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* input = context.Input("X"); - const auto& input_type = input->type(); + const auto& input_type = framework::TransToProtoVarType(input->dtype()); if (input_type == framework::proto::VarType::INT32) { BincountInner(context); diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc index 0c49dbe7e0f49..4ca0dded3e738 100644 --- a/paddle/fluid/operators/cast_op.cc +++ b/paddle/fluid/operators/cast_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/cast_op.h" #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/float16.h" #ifdef PADDLE_WITH_MLU @@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel { auto &tensor_place = tensor->place(); // NOTE: cuda pinned tensor need to copy its data to target place if (platform::is_cuda_pinned_place(tensor_place)) { - return framework::OpKernelType(tensor->type(), ctx.device_context()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor->dtype()), + ctx.device_context()); } #ifdef PADDLE_WITH_MKLDNN @@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel { return true; }; - if (this->CanMKLDNNBeUsed(ctx, tensor->type()) && MKLDNNSupportsCast()) { - return framework::OpKernelType(tensor->type(), ctx.GetPlace(), - framework::DataLayout::kMKLDNN, - framework::LibraryType::kMKLDNN); + if (this->CanMKLDNNBeUsed( + ctx, framework::TransToProtoVarType(tensor->dtype())) && + MKLDNNSupportsCast()) { + return framework::OpKernelType( + framework::TransToProtoVarType(tensor->dtype()), ctx.GetPlace(), + framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN); } #endif #ifdef PADDLE_WITH_MLU auto src_type = static_cast(ctx.Attr("in_dtype")); auto dst_type = static_cast(ctx.Attr("out_dtype")); if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) { - return framework::OpKernelType(tensor->type(), tensor_place); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor->dtype()), tensor_place); } else { VLOG(3) << "MLU not support cast type: " << framework::DataTypeToString(src_type) << " to type: " << framework::DataTypeToString(dst_type) << ", fallbacking to CPU one!"; - return framework::OpKernelType(tensor->type(), platform::CPUPlace()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor->dtype()), + platform::CPUPlace()); } #endif - return framework::OpKernelType(tensor->type(), tensor_place); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor->dtype()), tensor_place); } }; diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 9bf08d072da0f..ee2b280f5ab86 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel { out->mutable_data(dev_ctx.GetPlace(), static_cast(out_dtype)); - auto pt_out_dtype = pten::TransToPtenDataType( + auto pt_out_dtype = framework::TransToPtenDataType( static_cast(out_dtype)); // call new kernel diff --git a/paddle/fluid/operators/cast_op_npu.cc b/paddle/fluid/operators/cast_op_npu.cc index 060bcb78e4a3a..8879a226d29d2 100644 --- a/paddle/fluid/operators/cast_op_npu.cc +++ b/paddle/fluid/operators/cast_op_npu.cc @@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); - if (x->type() == dtype) { + if (framework::TransToProtoVarType(x->dtype()) == dtype) { // NOTE(zhiqiu): NPU cast op may result in wrong value, so // add special case here. VLOG(4) << "cast to same dtype:" << dtype; diff --git a/paddle/fluid/operators/cast_op_xpu.cc b/paddle/fluid/operators/cast_op_xpu.cc index 0ec5ca707a6b5..3337afcd3cb08 100644 --- a/paddle/fluid/operators/cast_op_xpu.cc +++ b/paddle/fluid/operators/cast_op_xpu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/cast_op.h" #include "paddle/fluid/platform/float16.h" @@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel { out->mutable_data(dev_ctx.GetPlace(), static_cast(out_dtype)); - auto pt_out_dtype = pten::TransToPtenDataType( + auto pt_out_dtype = framework::TransToPtenDataType( static_cast(out_dtype)); // call pten kernel pten::CastKernel( diff --git a/paddle/fluid/operators/class_center_sample_op.cu b/paddle/fluid/operators/class_center_sample_op.cu index 2d7800d9997fc..92bf20d6cf95d 100644 --- a/paddle/fluid/operators/class_center_sample_op.cu +++ b/paddle/fluid/operators/class_center_sample_op.cu @@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( num_classes_per_device_ptr, num_classes_per_device_ptr, num_classes_per_device.numel(), - platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum, - comm->comm(), calcu_stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(num_classes_per_device.dtype())), + ncclSum, comm->comm(), calcu_stream)); } #endif diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index d71d6fc39b119..67e668cc70d7c 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -23,6 +23,7 @@ #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -53,7 +54,7 @@ struct FillConstantVisitor { * = nullptr) const { #ifdef PADDLE_WITH_ASCEND_CL if (platform::is_npu_place(dev_ctx_.GetPlace())) { - Tensor tensor_tmp(dtype_); + Tensor tensor_tmp(framework::TransToPtenDataType(dtype_)); tensor_tmp.mutable_data({1}, context_.GetPlace()); FillNpuTensorWithConstant(&tensor_tmp, static_cast(value_)); @@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel { void *fused_tensor_ptr = fused_tensor ->Resize(framework::make_ddim({static_cast(numel)})) - .mutable_data(context.GetPlace(), dtype); + .mutable_data(context.GetPlace(), + framework::TransToPtenDataType(dtype)); VLOG(10) << "Fused tensor addr " << fused_tensor_ptr; // Init the continuous space diff --git a/paddle/fluid/operators/collective/allreduce_op.h b/paddle/fluid/operators/collective/allreduce_op.h index 314a91841bebf..4a35d59c6f3e6 100644 --- a/paddle/fluid/operators/collective/allreduce_op.h +++ b/paddle/fluid/operators/collective/allreduce_op.h @@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel { auto in = ctx.Input("X"); auto out = ctx.Output("Out"); - int dtype = platform::ToNCCLDataType(in->type()); + int dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); auto* sendbuff = in->data(); out->Resize(in->dims()); diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index 02b10f17da5a3..26fdee200cd84 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); auto out = ctx.Output("Out"); int send_numel = x->numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int ring_id = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE( diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc index a98a0bf6ab4a9..c59d8315a369e 100644 --- a/paddle/fluid/operators/collective/barrier_op.cu.cc +++ b/paddle/fluid/operators/collective/barrier_op.cu.cc @@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel { auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); const void* sendbuff = in->data(); void* recvbuff = out->mutable_data(place); diff --git a/paddle/fluid/operators/collective/broadcast_op.cu.cc b/paddle/fluid/operators/collective/broadcast_op.cu.cc index 04d028536a9b2..0e14b484e766f 100644 --- a/paddle/fluid/operators/collective/broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/broadcast_op.cu.cc @@ -17,6 +17,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace ops = paddle::operators; namespace plat = paddle::platform; @@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( send_recv_buffer, static_cast(in->numel()), - platform::ToNCCLDataType(in->type()), root_dev_id, comm, stream)); + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())), + root_dev_id, comm, stream)); VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")" << " From " << root_dev_id << " to " << dev_id; diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index f174473c049ec..89854999c16fc 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) auto in = ctx.Input("X"); auto out = ctx.Output("Out"); - ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int nranks = ctx.Attr("nranks"); int rid = ctx.Attr("ring_id"); diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu.cc b/paddle/fluid/operators/collective/c_allgather_op_npu.cc index 5ebcc9064f790..5339293da0fe2 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu.cc @@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel { #if defined(PADDLE_WITH_ASCEND_CL) auto in = ctx.Input("X"); auto out = ctx.Output("Out"); - HcclDataType dtype = platform::ToHCCLDataType(in->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); int ring_id = ctx.Attr("ring_id"); std::string group = diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 0e4210ea7304a..a04935d43eb2d 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { auto in = ctx.Input("X"); auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - HcclDataType dtype = platform::ToHCCLDataType(in->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); void* sendbuff = reinterpret_cast(const_cast(in->data())); @@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { bool found_nan = false; - auto d_type = in->type(); + auto d_type = framework::TransToProtoVarType(in->dtype()); switch (d_type) { case framework::proto::VarType::FP16: { break; @@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - BKCLDataType dtype = platform::ToBKCLDataType(in->type()); + BKCLDataType dtype = + platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); const void* sendbuff = in->data(); out->Resize(in->dims()); @@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); const void* sendbuff = in->data(); out->Resize(in->dims()); diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index 6deb837069761..fa4af87ff10e1 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); auto out = ctx.Output("Out"); int numel = x->numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int rid = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc index 0860da1362538..70032ac28a046 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc @@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel { auto x = ctx.Input("X"); void* ptr = reinterpret_cast(const_cast(x->data())); int numel = x->numel(); - HcclDataType dtype = platform::ToHCCLDataType(x->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype())); auto out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index 738ed16286131..db5a2317a2d81 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -31,7 +31,8 @@ class CConcatOpCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto x = ctx.Input("X"); auto out = ctx.Output("Out"); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int nranks = ctx.Attr("nranks"); int rank = ctx.Attr("rank"); diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu index 9b343b34a3e51..ba50f5c48bd40 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cu +++ b/paddle/fluid/operators/collective/c_embedding_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/collective/c_embedding_op.h" @@ -95,7 +96,7 @@ class CEmbeddingCUDAKernel : public framework::OpKernel { int blocks = NumBlocks(limit); int threads = kNumCUDAThreads; - const auto &index_type = ids_t->type(); + const auto &index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { CEmbedding<<>>( output, table, ids_t->data(), K, D, N, start_idx, end_idx, @@ -138,7 +139,7 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel { auto t = framework::EigenVector::Flatten(*d_table_t); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast(0)); - const auto &index_type = ids_t->type(); + const auto &index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { CEmbeddingGrad<<>>( d_table, d_output, ids_t->data(), K, D, N, start_idx, diff --git a/paddle/fluid/operators/collective/c_embedding_op.h b/paddle/fluid/operators/collective/c_embedding_op.h index 1d410847dd798..6bf99d59fa4a5 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.h +++ b/paddle/fluid/operators/collective/c_embedding_op.h @@ -66,7 +66,7 @@ class CEmbeddingOpCPUKernel : public framework::OpKernel { const int64_t height = table_t->dims()[0]; const int64_t width = table_t->dims()[1]; - const auto& index_type = ids_t->type(); + const auto& index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { GetIdsEmbedding(ids_t->data(), ids_t->numel(), start_idx, table_data, height, width, output_data); @@ -110,9 +110,11 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel { table_grad_t->mutable_data(table_t->dims(), context.GetPlace()); size_t table_t_mem_size = - table_t->numel() * framework::SizeOfType(table_grad_t->type()); + table_t->numel() * framework::DataTypeSize(table_grad_t->dtype()); size_t table_grad_t_mem_size = - table_grad_t->numel() * framework::SizeOfType(table_grad_t->type()); + table_grad_t->numel() * + framework::SizeOfType( + framework::TransToProtoVarType(table_grad_t->dtype())); VLOG(10) << "table_dims:" << table_t->dims() << ", table_t memory_size:" << table_t_mem_size @@ -125,7 +127,7 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel { const int64_t height = table_t->dims()[0]; const int64_t width = table_t->dims()[1]; - const auto& index_type = ids_t->type(); + const auto& index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { UpdateEmbedding(ids_t->data(), ids_t->numel(), start_idx, table_grad_data, height, width, d_output_data); diff --git a/paddle/fluid/operators/collective/c_embedding_op_npu.cc b/paddle/fluid/operators/collective/c_embedding_op_npu.cc index 3e96f15d5d3dd..7a7015b8faaa3 100644 --- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc +++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc @@ -124,9 +124,10 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) { framework::make_ddim({table_t->dims()[0] + 1, table_t->dims()[1]}); framework::LoDTensor table_t_pad; - size_t mem_size = table_t->numel() * framework::SizeOfType(table_t->type()); + size_t mem_size = + table_t->numel() * framework::DataTypeSize(table_t->dtype()); size_t line_mem_size = - table_t->dims()[1] * framework::SizeOfType(table_t->type()); + table_t->dims()[1] * framework::DataTypeSize(table_t->dtype()); PADDLE_ENFORCE_EQ(line_mem_size % 64, 0, platform::errors::InvalidArgument( "NPU only accept the second dim must align by 64")); @@ -160,7 +161,7 @@ class CEmbeddingNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { auto *ids_t = context.Input("Ids"); - const auto &index_type = ids_t->type(); + const auto &index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { NPUGetIdsEmbedding(context); } else { @@ -200,7 +201,9 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) { uint8_t *pad_data = reinterpret_cast( table_t_pad.mutable_data(pad_shape, context.GetPlace())); size_t table_t_pad_mem_size = - table_t_pad.numel() * framework::SizeOfType(table_t_pad.type()); + table_t_pad.numel() * + framework::SizeOfType( + framework::TransToProtoVarType(table_t_pad.dtype())); platform::NPUMemsetAsync(pad_data, 0, table_t_pad_mem_size, stream, table_t_pad_mem_size); @@ -215,11 +218,11 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) { // copy table_t_pad to table_t T *dst = table_grad_t->mutable_data(table_t->dims(), context.GetPlace()); const size_t mem_size = - table_grad_t->numel() * framework::SizeOfType(table_grad_t->type()); + table_grad_t->numel() * framework::DataTypeSize(table_grad_t->dtype()); // check align size_t line_mem_size = - table_grad_t->dims()[1] * framework::SizeOfType(table_grad_t->type()); + table_grad_t->dims()[1] * framework::DataTypeSize(table_grad_t->dtype()); PADDLE_ENFORCE_EQ(line_mem_size % 64, 0, platform::errors::InvalidArgument( "NPU only accept the second dim must align by 64")); @@ -234,7 +237,7 @@ class CEmbeddingGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { auto *ids_t = context.Input("Ids"); - const auto &index_type = ids_t->type(); + const auto &index_type = framework::TransToProtoVarType(ids_t->dtype()); if (index_type == framework::proto::VarType::INT32) { NPUUpdateEmbedding(context); } else { diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index a0e0f8e92bdde..ad798baf3ce91 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -131,7 +132,8 @@ class CReduceOpASCENDKernel : public framework::OpKernel { auto in = ctx.Input("X"); auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - HcclDataType dtype = platform::ToHCCLDataType(in->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); void* sendbuff = reinterpret_cast(const_cast(in->data())); @@ -211,7 +213,8 @@ class CReduceOpXPUKernel : public framework::OpKernel { auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - BKCLDataType dtype = platform::ToBKCLDataType(in->type()); + BKCLDataType dtype = + platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); const void* sendbuff = in->data(); out->Resize(in->dims()); @@ -274,7 +277,8 @@ class CReduceOpCUDAKernel : public framework::OpKernel { auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); - ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int64_t numel = in->numel(); const void* sendbuff = in->data(); out->Resize(in->dims()); diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 141fa760413b3..9b05e940d4f60 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -47,7 +47,8 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { int64_t recv_numel = in->numel() / nranks; const T* send_buff = in->data(); T* recv_buff = out->data(); - int dtype = platform::ToNCCLDataType(in->type()); + int dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); gpuStream_t stream = nullptr; if (ctx.Attr("use_calc_stream")) { diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc index 7241f66174d55..efc94ce1eaef9 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc @@ -51,7 +51,8 @@ class CReduceScatterOpAscendKernel : public framework::OpKernel { void* inputPtr = reinterpret_cast(const_cast(in->data())); void* outputPtr = reinterpret_cast(out->data()); - HcclDataType dtype = platform::ToHCCLDataType(in->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); aclrtStream stream = nullptr; if (ctx.Attr("use_calc_stream")) { diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc index 4d4dc0c12af55..7a964c9c69717 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc @@ -30,7 +30,8 @@ class CScatterOpCUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); auto out = ctx.Output("Out"); int numel = x->numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int nranks = ctx.Attr("nranks"); int root_id = ctx.Attr("root"); diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index 6371d523cfa4a..4f1f1ec651206 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -121,8 +121,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel { eigen_logits.maximum(along_axis); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( logits_max_buff, logits_max_buff, logits_max.numel(), - platform::ToNCCLDataType(logits_max.type()), ncclMax, comm->comm(), - stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(logits_max.dtype())), + ncclMax, comm->comm(), stream)); // step 2, obtain logit - logit_max Eigen::DSizes batch_by_one(N, 1); @@ -147,7 +148,7 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel { int blocks = NumBlocks(N); int threads = kNumCUDAThreads; - const auto& label_type = labels->type(); + const auto& label_type = framework::TransToProtoVarType(labels->dtype()); if (label_type == framework::proto::VarType::INT32) { MaskLabelByIndex<<>>( @@ -162,8 +163,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel { void* predict_logits_buff = predicted_logits.mutable_data(place); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( predict_logits_buff, predict_logits_buff, predicted_logits.numel(), - platform::ToNCCLDataType(predicted_logits.type()), ncclSum, - comm->comm(), stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(predicted_logits.dtype())), + ncclSum, comm->comm(), stream)); // step 4, obtain exp(logit) eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp(); @@ -180,8 +182,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( sum_exp_logits_buff, sum_exp_logits_buff, sum_exp_logits.numel(), - platform::ToNCCLDataType(sum_exp_logits.type()), ncclSum, comm->comm(), - stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(sum_exp_logits.dtype())), + ncclSum, comm->comm(), stream)); auto eigen_loss = math::EigenMatrix::From(loss_2d); auto eigen_predicted_logits = math::EigenMatrix::From(predicted_logits); @@ -225,7 +228,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { int blocks = NumBlocks(N * D); int threads = kNumCUDAThreads; - const auto& label_type = labels->type(); + const auto& label_type = framework::TransToProtoVarType(labels->dtype()); const int start_index = rank * D; const int end_index = start_index + D; diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index e2ff823420aef..0f419a6aa52ab 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -30,8 +31,10 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); auto local_count = ctx.Input("local_count"); auto global_count = ctx.Input("global_count"); - auto local_count_type = local_count->type(); - auto global_count_type = global_count->type(); + auto local_count_type = + framework::TransToProtoVarType(local_count->dtype()); + auto global_count_type = + framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { PADDLE_THROW(platform::errors::InvalidArgument( "Please use int64 type in local_count.")); @@ -65,7 +68,8 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel { cpu_global_count_data = cpu_global_count.data(); } - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int ring_id = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE( diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index c47d27366c5f2..7a42c9abcb352 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -30,8 +31,10 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); auto local_count = ctx.Input("local_count"); auto global_count = ctx.Input("global_count"); - auto local_count_type = local_count->type(); - auto global_count_type = global_count->type(); + auto local_count_type = + framework::TransToProtoVarType(local_count->dtype()); + auto global_count_type = + framework::TransToProtoVarType(global_count->dtype()); if (local_count_type != framework::proto::VarType::INT64) { PADDLE_THROW(platform::errors::InvalidArgument( "Please use int64 type in local_count.")); @@ -63,7 +66,8 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel { global_count_len = cpu_global_count.numel(); } - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int ring_id = ctx.Attr("ring_id"); PADDLE_ENFORCE_GE( diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index 094847beca214..4299578c173aa 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -30,7 +30,8 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { auto in = ctx.Input("X"); auto out = ctx.Output("Out"); int64_t numel = in->numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(in->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); int nranks = ctx.Attr("nranks"); int rank = ctx.Attr("rank"); diff --git a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc index 1ac020838a3f8..0314bb7d5de1d 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc @@ -29,7 +29,8 @@ class CallPartialGatherOpASCENDKernel : public framework::OpKernel { auto in = ctx.Input("X"); auto out = ctx.Output("Out"); int64_t numel = in->numel(); - HcclDataType dtype = platform::ToHCCLDataType(in->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); int rank = ctx.Attr("rank"); int ring_id = ctx.Attr("ring_id"); diff --git a/paddle/fluid/operators/collective/partial_recv_op_npu.cc b/paddle/fluid/operators/collective/partial_recv_op_npu.cc index bbdf14b1bdc00..f14ce5f81f905 100644 --- a/paddle/fluid/operators/collective/partial_recv_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op_npu.cc @@ -35,7 +35,8 @@ class PartialRecvOpASCENDKernel : public framework::OpKernel { void* ptr = reinterpret_cast(const_cast(out->data()) + offset); int numel = recv_numel; - HcclDataType dtype = platform::ToHCCLDataType(out->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(out->dtype())); int ring_id = ctx.Attr("ring_id"); diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc index 8a4f7f750a15b..f6b4d85c9d593 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -70,7 +71,8 @@ class PartialSendCUDAKernel : public framework::OpKernel { "be less than comm->nranks (%d).", peer, comm->nranks())); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); int send_numel = numel / num; int offset = send_numel * id; diff --git a/paddle/fluid/operators/collective/partial_send_op_npu.cc b/paddle/fluid/operators/collective/partial_send_op_npu.cc index 1918e6c16ff25..31c74fcc196be 100644 --- a/paddle/fluid/operators/collective/partial_send_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_send_op_npu.cc @@ -33,7 +33,8 @@ class PartialSendOpASCENDKernel : public framework::OpKernel { void* ptr = reinterpret_cast(const_cast(x->data()) + offset); int numel = send_numel; - HcclDataType dtype = platform::ToHCCLDataType(x->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype())); int ring_id = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu.cc b/paddle/fluid/operators/collective/recv_v2_op_npu.cc index c20aa76ea40d5..6a2244b91025a 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu.cc @@ -31,7 +31,8 @@ class CRecvOpASCENDKernel : public framework::OpKernel { out->mutable_data(out->dims(), ctx.GetPlace()); void* ptr = reinterpret_cast(const_cast(out->data())); int numel = out->numel(); - HcclDataType dtype = platform::ToHCCLDataType(out->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(out->dtype())); int ring_id = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 952fcf2065d59..95eadb728caed 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -61,7 +61,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { VLOG(3) << "LodTensorArray: idx(" << idx << ")"; auto& x = x_array.at(idx); int numel = x.numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(x.type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x.dtype())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend( x.data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " send " @@ -72,7 +73,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { auto x = ctx.Input("X"); int numel = x->numel(); - ncclDataType_t dtype = platform::ToNCCLDataType(x->type()); + ncclDataType_t dtype = + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend( x->data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " send " diff --git a/paddle/fluid/operators/collective/send_v2_op_npu.cc b/paddle/fluid/operators/collective/send_v2_op_npu.cc index 86d86a01354b2..3bc5487371bac 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu.cc @@ -30,7 +30,8 @@ class CSendOpASCENDKernel : public framework::OpKernel { auto x = ctx.Input("X"); void* ptr = reinterpret_cast(const_cast(x->data())); int numel = x->numel(); - HcclDataType dtype = platform::ToHCCLDataType(x->type()); + HcclDataType dtype = + platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype())); int ring_id = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 2746f0345302f..5d6f5f4d8593d 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -76,7 +76,7 @@ class ConcatOp : public framework::OperatorWithKernel { bool flag = 0; for (auto *input : inputs) { if (input->IsInitialized() && input->numel() > 0) { - input_data_type = input->type(); + input_data_type = framework::TransToProtoVarType(input->dtype()); flag = 1; break; } diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc index 6325d1ccda8c8..1a6e29f3ac392 100644 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -61,13 +61,15 @@ class ConcatMLUKernel : public framework::OpKernel { std::vector desc_vector; for (size_t i = 0; i < ins_size; i++) { input_descs.emplace_back(MLUCnnlTensorDesc( - *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type()))); + *ins[i], CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(ins[i]->dtype())))); desc_vector.push_back(input_descs.back().get()); inputs.push_back(GetBasePtr(ins[i])); } // init out tensors - MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(out->type())); + MLUCnnlTensorDesc output_desc( + *out, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(out->dtype()))); // MLU should do sth MLUCnnl::Concat(ctx, ins_size_t, axis_t, desc_vector.data(), inputs.data(), diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc index f961e479ce47c..68720e70b09ad 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc @@ -242,7 +242,7 @@ class ConditionalBlockGradOp : public ConditionalOp { } VLOG(4) << "Assigning zero to " << outside_tensor; outside_tensor->Resize(input_tensor.dims()); - outside_tensor->mutable_data(place, input_tensor.type()); + outside_tensor->mutable_data(place, input_tensor.dtype()); const platform::DeviceContext *dev_ctx = platform::DeviceContextPool::Instance().Get(place); pten::funcs::set_constant(*dev_ctx, outside_tensor, 0.0f); diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h index a44cff555f0a1..c024e4a12cd47 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op.h @@ -65,7 +65,8 @@ class ConditionalOp : public framework::OperatorBase { platform::errors::InvalidArgument( "condition should have one initialized input as condition")); - PADDLE_ENFORCE_EQ(ips[0]->type() == framework::proto::VarType::BOOL && + PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(ips[0]->dtype()) == + framework::proto::VarType::BOOL && ips[0]->numel() == 1, true, platform::errors::InvalidArgument( "condition input's data type should be bool, " diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index 8e46c7acf0918..0727895935525 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -169,7 +169,7 @@ class ReadFromArrayOp : public ArrayOp { auto &fw_var_tensor = fw_var->Get(); framework::AttributeMap attrs; - attrs["dtype"] = fw_var_tensor.type(); + attrs["dtype"] = framework::TransToProtoVarType(fw_var_tensor.dtype()); attrs["shape"] = framework::vectorize(fw_var_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 6fa309f0e2d90..67aabe758810a 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -375,7 +375,8 @@ class WhileGradOp : public framework::OperatorBase { var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; - attrs["dtype"] = inside_tensor.type(); + attrs["dtype"] = + framework::TransToProtoVarType(inside_tensor.dtype()); attrs["shape"] = framework::vectorize(inside_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index e500814232aae..d8b54d4f95c0d 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -194,7 +194,8 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( if (input_data_type != framework::proto::VarType::INT8 && input_data_type != framework::proto::VarType::UINT8 && input_data_type != framework::proto::VarType::BF16) { - auto filter_data_type = ctx.Input("Filter")->type(); + auto filter_data_type = + framework::TransToProtoVarType(ctx.Input("Filter")->dtype()); PADDLE_ENFORCE_EQ( input_data_type, filter_data_type, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index fb22765d76ea6..956ba80f32bd1 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -235,8 +235,8 @@ class GemmConvKernel : public framework::OpKernel { const std::string data_format = context.Attr("data_format"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - Tensor transformed_input(input->type()); - Tensor transformed_output(output->type()); + Tensor transformed_input(input->dtype()); + Tensor transformed_output(output->dtype()); if (channel_last) { ResizeToChannelFirst(context, input, @@ -398,8 +398,8 @@ class GemmConvGradKernel : public framework::OpKernel { const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); - Tensor transformed_input(input->type()); - Tensor transformed_output_grad(output_grad->type()); + Tensor transformed_input(input->dtype()); + Tensor transformed_output_grad(output_grad->dtype()); if (channel_last) { ResizeToChannelFirst(context, input, @@ -490,7 +490,7 @@ class GemmConvGradKernel : public framework::OpKernel { if (input_grad) { input_grad->mutable_data(context.GetPlace()); - Tensor transformed_input_grad(input_grad->type()); + Tensor transformed_input_grad(input_grad->dtype()); if (channel_last) { ResizeToChannelFirst(context, input_grad, &transformed_input_grad); @@ -616,9 +616,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); // transform Tensor - Tensor transformed_X(X->type()); - Tensor transformed_dY(dY->type()); - Tensor transformed_ddX(X->type()); + Tensor transformed_X(X->dtype()); + Tensor transformed_dY(dY->dtype()); + Tensor transformed_ddX(X->dtype()); if (channel_last) { ResizeToChannelFirst(ctx, X, &transformed_X); @@ -703,7 +703,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape); dX->mutable_data(ctx.GetPlace()); - Tensor transformed_dX(dX->type()); + Tensor transformed_dX(dX->dtype()); if (channel_last) { ResizeToChannelFirst(ctx, dX, &transformed_dX); @@ -794,7 +794,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { if (ddY) { ddY->mutable_data(ctx.GetPlace()); - Tensor transformed_ddY(ddY->type()); + Tensor transformed_ddY(ddY->dtype()); if (channel_last) { ResizeToChannelFirst(ctx, ddY, &transformed_ddY); } else { diff --git a/paddle/fluid/operators/conv_op_mlu.cc b/paddle/fluid/operators/conv_op_mlu.cc index 88698c02dd5da..2155de4d05ecb 100644 --- a/paddle/fluid/operators/conv_op_mlu.cc +++ b/paddle/fluid/operators/conv_op_mlu.cc @@ -80,12 +80,14 @@ class MLUConvOpKernel : public framework::OpKernel { true /*need_reshape_or_alloc*/); cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC; - MLUCnnlTensorDesc input_desc(input_tensor, data_layout, - ToCnnlDataType(input_tensor.type())); + MLUCnnlTensorDesc input_desc( + input_tensor, data_layout, + ToCnnlDataType(framework::TransToProtoVarType(input_tensor.dtype()))); MLUCnnlTensorDesc filter_desc(trans_filter, data_layout, ToCnnlDataType(trans_filter.type())); - MLUCnnlTensorDesc output_desc(output_tensor, data_layout, - ToCnnlDataType(output_tensor.type())); + MLUCnnlTensorDesc output_desc( + output_tensor, data_layout, + ToCnnlDataType(framework::TransToProtoVarType(output_tensor.dtype()))); MLUCnnlConvolutionDesc conv_desc(in_dims_size, paddings.data(), strides.data(), dilations.data(), groups, diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index a2e6ff214bfa3..ffb9ffcf98c91 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -104,9 +104,11 @@ class CorrelationOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ(input_data_type, ctx.Input("Input2")->type(), - platform::errors::InvalidArgument( - "X and Y shoule have the same datatype")); + PADDLE_ENFORCE_EQ( + input_data_type, + framework::TransToProtoVarType(ctx.Input("Input2")->dtype()), + platform::errors::InvalidArgument( + "X and Y shoule have the same datatype")); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/crop_op_npu.cc b/paddle/fluid/operators/crop_op_npu.cc index 013ad5dd8cb86..782fb10bc5368 100644 --- a/paddle/fluid/operators/crop_op_npu.cc +++ b/paddle/fluid/operators/crop_op_npu.cc @@ -80,7 +80,7 @@ class CropNPUKernel : public framework::OpKernel { "Input(shape) should be equal to the shape of dims " "(%d) of the Input(X).", shape_size.size(), x->dims().size())); - Tensor tmp_shape(x->type()); + Tensor tmp_shape(x->dtype()); tmp_shape.Resize(framework::make_ddim(shape_size)); tmp_shape.mutable_data(ctx.GetPlace()); const auto& runner = diff --git a/paddle/fluid/operators/cumsum_op_npu.cc b/paddle/fluid/operators/cumsum_op_npu.cc index ea0b3085704bc..24b6fb777e9a3 100644 --- a/paddle/fluid/operators/cumsum_op_npu.cc +++ b/paddle/fluid/operators/cumsum_op_npu.cc @@ -27,10 +27,12 @@ static void CumsumImp(const Tensor& input, Tensor* output, auto stream = ctx.template device_context() .stream(); - if (input.type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(input.dtype()) == + framework::proto::VarType::INT64) { Tensor tmp_input; tmp_input.mutable_data(input.dims(), ctx.GetPlace()); - auto dst_acl_dtype = ConvertToNpuDtype(tmp_input.type()); + auto dst_acl_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type())); const auto& cast_runner_1 = NpuOpRunner("Cast", {input}, {tmp_input}, {{"dst_type", static_cast(dst_acl_dtype)}}); @@ -42,7 +44,8 @@ static void CumsumImp(const Tensor& input, Tensor* output, NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input); runner.Run(stream); - dst_acl_dtype = ConvertToNpuDtype(output->type()); + dst_acl_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(output->type())); const auto& cast_runner_2 = NpuOpRunner("Cast", {tmp_output}, {*output}, {{"dst_type", static_cast(dst_acl_dtype)}}); diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index 5d157a77b3dd1..28a7922120139 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -179,15 +179,18 @@ class DataNormGradKernel PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( reinterpret_cast(d_batch_size), reinterpret_cast(d_batch_size), C, - platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream)); + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())), + ncclSum, comm->comm(), stream)); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( reinterpret_cast(d_batch_sum), reinterpret_cast(d_batch_sum), C, - platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream)); + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())), + ncclSum, comm->comm(), stream)); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( reinterpret_cast(d_batch_square_sum), reinterpret_cast(d_batch_square_sum), C, - platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream)); + platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())), + ncclSum, comm->comm(), stream)); platform::GpuStreamSync(stream); #else PADDLE_THROW(platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/operators/decode_jpeg_op.cc b/paddle/fluid/operators/decode_jpeg_op.cc index dd82c74885b94..d0142244aaeb1 100644 --- a/paddle/fluid/operators/decode_jpeg_op.cc +++ b/paddle/fluid/operators/decode_jpeg_op.cc @@ -73,8 +73,9 @@ class DecodeJpegOp : public framework::OperatorWithKernel { return expected_kernel_type; } - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } }; diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h index 18c45a1a4c6c1..7bbbbe7f40ecc 100644 --- a/paddle/fluid/operators/detection/bbox_util.h +++ b/paddle/fluid/operators/detection/bbox_util.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" @@ -146,7 +147,7 @@ static void AppendProposals(framework::Tensor* dst, int64_t offset, const framework::Tensor& src) { auto* out_data = dst->data(); auto* to_add_data = src.data(); - size_t size_of_t = framework::SizeOfType(src.type()); + size_t size_of_t = framework::DataTypeSize(src.dtype()); offset *= size_of_t; std::memcpy( reinterpret_cast(reinterpret_cast(out_data) + offset), diff --git a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc index a5b2133847c60..e2989bdb31b48 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc @@ -43,7 +43,8 @@ struct DensityPriorBoxFunction { runner.Run(stream); } void Cast(const Tensor* x, Tensor* y) { - auto dst_dtype = ConvertToNpuDtype(y->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(y->type())); const auto& runner = NpuOpRunner( "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); runner.Run(stream); @@ -121,7 +122,7 @@ struct DensityPriorBoxFunction { template <> void DensityPriorBoxFunction::Arange(int n, Tensor* x) { - Tensor x_fp32(framework::proto::VarType::FP32); + Tensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); @@ -132,7 +133,7 @@ void DensityPriorBoxFunction::Arange(int n, Tensor* x) { template <> void DensityPriorBoxFunction::FloatVec2Tsr(const std::vector& vec, Tensor* tsr_dst) { - Tensor tsr_fp32(framework::proto::VarType::FP32); + Tensor tsr_fp32(experimental::DataType::FLOAT32); tsr_fp32.mutable_data(tsr_dst->dims(), place); framework::TensorFromVector(vec, ctx.device_context(), &tsr_fp32); ctx.template device_context().Wait(); @@ -164,7 +165,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { int layer_w = input->dims()[3]; int layer_h = input->dims()[2]; - auto _type = input->type(); + auto _type = input->dtype(); auto place = ctx.GetPlace(); DensityPriorBoxFunction F(ctx); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc index c76ee58c7fa9e..4709120027c58 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc @@ -85,7 +85,7 @@ class IouSimilarityNPUKernel : public framework::OpKernel { bool normalized = ctx.Attr("box_normalized"); auto* out = ctx.Output("Out"); - auto _type = x->type(); + auto _type = x->dtype(); auto place = ctx.GetPlace(); IouFunction F(ctx); diff --git a/paddle/fluid/operators/detection/prior_box_op.cc b/paddle/fluid/operators/detection/prior_box_op.cc index cf19e2411090a..624d3c87cd9b1 100644 --- a/paddle/fluid/operators/detection/prior_box_op.cc +++ b/paddle/fluid/operators/detection/prior_box_op.cc @@ -19,6 +19,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -101,7 +102,8 @@ class PriorBoxOp : public framework::OperatorWithKernel { this->CanMKLDNNBeUsed(ctx, input_input_type)) { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; - auto input_image_type = ctx.Input("Image")->type(); + auto input_image_type = framework::TransToProtoVarType( + ctx.Input("Image")->dtype()); int customized_type_value = framework::OpKernelType::kDefaultCustomizedTypeValue; if (input_image_type == framework::DataTypeTrait::DataType()) { diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h index 94c8513086c20..2b579900267fd 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h @@ -196,7 +196,7 @@ class DlnneEngineOp : public framework::OperatorBase { index++; int64_t data_bytes; int32_t dtype; - auto type = t.type(); + auto type = framework::TransToProtoVarType(t.dtype()); data_bytes = 1; void *buffer = nullptr; if (type == framework::proto::VarType::FP32) { diff --git a/paddle/fluid/operators/dropout_op_npu.cc b/paddle/fluid/operators/dropout_op_npu.cc index 98a38a07dadaa..b5919459f078d 100644 --- a/paddle/fluid/operators/dropout_op_npu.cc +++ b/paddle/fluid/operators/dropout_op_npu.cc @@ -56,8 +56,8 @@ class DropoutNPUKernel : public framework::OpKernel { // only achive the default `upscale_in_train` method if (!is_test) { - Tensor tmp_x(x->type()); - Tensor tmp_out(out->type()); + Tensor tmp_x(x->dtype()); + Tensor tmp_out(out->dtype()); tmp_x.ShareDataWith(*x); tmp_out.ShareDataWith(*out); if (x->dims().size() == 1) { @@ -80,7 +80,7 @@ class DropoutNPUKernel : public framework::OpKernel { seed = ctx.Attr("fix_seed") ? ctx.Attr("seed") : 0; } - Tensor keep_prob_tensor(x->type()); + Tensor keep_prob_tensor(x->dtype()); keep_prob_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&keep_prob_tensor, static_cast(keep_prob)); @@ -89,7 +89,7 @@ class DropoutNPUKernel : public framework::OpKernel { // mask used in `DropOutGenMask` NPU OP is different from // the output `Mask`. - Tensor npu_mask(framework::proto::VarType::UINT8); + Tensor npu_mask(experimental::DataType::UINT8); uint32_t length = (x->numel() + 128 - 1) / 128 * 128; npu_mask.Resize(framework::make_ddim({length / 8})); npu_mask.mutable_data(ctx.GetPlace()); @@ -116,17 +116,19 @@ class DropoutNPUKernel : public framework::OpKernel { runner_dropout.Run(stream); // cast `out` from float/float16 to bool - Tensor cast_mask(framework::proto::VarType::BOOL); + Tensor cast_mask(experimental::DataType::BOOL); cast_mask.Resize(mask->dims()); cast_mask.mutable_data(ctx.GetPlace()); - auto dst_dtype_bool = ConvertToNpuDtype(cast_mask.type()); + auto dst_dtype_bool = + ConvertToNpuDtype(framework::TransToProtoVarType(cast_mask.dtype())); const auto& runner_cast_mask_bool = NpuOpRunner("Cast", {*out}, {cast_mask}, {{"dst_type", static_cast(dst_dtype_bool)}}); runner_cast_mask_bool.Run(stream); // cast cast_mask from bool to uint8 - auto dst_dtype_uint8 = ConvertToNpuDtype(mask->type()); + auto dst_dtype_uint8 = + ConvertToNpuDtype(framework::TransToProtoVarType(mask->dtype())); const auto& runner_cast_mask_uint8 = NpuOpRunner("Cast", {cast_mask}, {*mask}, {{"dst_type", static_cast(dst_dtype_uint8)}}); @@ -167,10 +169,11 @@ class DropoutGradNPUKernel : public framework::OpKernel { } // cast mask from uint8 to float32/float16 - Tensor cast_mask(dx->type()); + Tensor cast_mask(dx->dtype()); cast_mask.Resize(mask->dims()); cast_mask.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(dx->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(dx->dtype())); const auto& runner_cast_mask = NpuOpRunner("Cast", {*mask}, {cast_mask}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index 4dd5b7cfd8499..b4b6e2ce2fc56 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -189,7 +189,7 @@ class EigKernel : public framework::OpKernel { auto* out_values = context.Output("Eigenvalues"); auto* out_vectors = context.Output("Eigenvectors"); - if (!framework::IsComplexType(x->type())) { + if (!framework::IsComplexType(framework::TransToProtoVarType(x->dtype()))) { out_values->mutable_data(context.GetPlace()); out_vectors->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/eigvals_op.h b/paddle/fluid/operators/eigvals_op.h index 6fdf849ac7613..d825833b02422 100644 --- a/paddle/fluid/operators/eigvals_op.h +++ b/paddle/fluid/operators/eigvals_op.h @@ -110,7 +110,8 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input, static_cast(NULL), &info); std::string name = "framework::platform::dynload::dgeev_"; - if (input.type() == framework::proto::VarType::FP64) { + if (framework::TransToProtoVarType(input.dtype()) == + framework::proto::VarType::FP64) { name = "framework::platform::dynload::sgeev_"; } CheckLapackEigResult(info, name); @@ -159,7 +160,8 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input, rwork->template data>(), &info); std::string name = "framework::platform::dynload::cgeev_"; - if (input.type() == framework::proto::VarType::COMPLEX64) { + if (framework::TransToProtoVarType(input.dtype()) == + framework::proto::VarType::COMPLEX64) { name = "framework::platform::dynload::zgeev_"; } CheckLapackEigResult(info, name); @@ -203,7 +205,8 @@ class EigvalsKernel : public framework::OpKernel { lwork = 3 * n_dim; work.mutable_data(framework::make_ddim({lwork}), ctx.GetPlace()); } - if (framework::IsComplexType(input->type())) { + if (framework::IsComplexType( + framework::TransToProtoVarType(input->dtype()))) { rwork.mutable_data>(framework::make_ddim({n_dim << 1}), ctx.GetPlace()); } diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index 791e548372fd1..493dcf72923f3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -196,8 +196,9 @@ class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc index a697d9e007c0c..fb9dbc7fc825c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc @@ -85,13 +85,13 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {}); runner_tensor_zeros.Run(stream); - Tensor x_zero(paddle::framework::proto::VarType::BOOL); + Tensor x_zero(experimental::DataType::BOOL); x_zero.mutable_data(x->dims(), place); const auto& runner_x_zero = NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {}); runner_x_zero.Run(stream); - Tensor x_nozero(paddle::framework::proto::VarType::BOOL); + Tensor x_nozero(experimental::DataType::BOOL); x_nozero.mutable_data(x->dims(), place); const auto& runner_x_nonzero = NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {}); diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 40faf7cbbe8cd..1262562ad521f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -48,8 +48,9 @@ class ElementwiseMulOp : public ElementwiseOp { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index 0c04f7b360e30..cdd1a9b976bd6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -158,8 +158,9 @@ class ElementwiseOp : public framework::OperatorWithKernel { const framework::OpKernelType &expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { #ifdef PADDLE_WITH_MKLDNN // When elementwise is first oneDNN op (there was some non oneDNN op @@ -346,8 +347,9 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { const framework::OpKernelType &expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -396,8 +398,9 @@ class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel { const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -449,8 +452,9 @@ class ElementwiseOpDoubleGradWithoutDXDY const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -506,8 +510,9 @@ class ElementwiseOpTripleGrad : public framework::OperatorWithKernel { const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index 6c51df5c61ef3..9305f42021192 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h" namespace paddle { @@ -41,9 +42,11 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel { auto* dy = ctx.Output(framework::GradVarName("Y")); auto tz = paddle::framework::vectorize(dout->dims()); - memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type()); - platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, - onednn_engine); + memory::data_type dout_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(dout->dtype())); + platform::ReorderMKLDNNHandler handler( + tz, framework::TransToProtoVarType(dout->dtype()), dout_type, + onednn_engine); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto reorder_src_memory_p = handler.AcquireSrcMemory( diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc index e7bb73340b841..642ee1feb7a5d 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h" namespace paddle { namespace framework { @@ -41,9 +42,11 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel { auto* dy = ctx.Output(framework::GradVarName("Y")); auto tz = framework::vectorize(dout->dims()); - memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type()); - platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, - onednn_engine); + memory::data_type dout_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(dout->dtype())); + platform::ReorderMKLDNNHandler handler( + tz, framework::TransToProtoVarType(dout->dtype()), dout_type, + onednn_engine); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); auto reorder_src_memory_p = handler.AcquireSrcMemory( diff --git a/paddle/fluid/operators/empty_op.h b/paddle/fluid/operators/empty_op.h index 9c91377683870..42c951385a438 100644 --- a/paddle/fluid/operators/empty_op.h +++ b/paddle/fluid/operators/empty_op.h @@ -17,6 +17,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" @@ -37,7 +38,8 @@ class EmptyKernel : public framework::OpKernel { auto shape = GetShape(context); out_tensor->Resize(shape); - out_tensor->mutable_data(context.GetPlace(), dtype); + out_tensor->mutable_data(context.GetPlace(), + framework::TransToPtenDataType(dtype)); } }; diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index 58418d01b81da..d2f8daf732050 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -116,11 +116,13 @@ class ExpandV2NPUKernel : public framework::OpKernel { runner.Run(dev_ctx.stream()); }; - if (X->type() == framework::proto::VarType::BOOL) { + if (framework::TransToProtoVarType(X->dtype()) == + framework::proto::VarType::BOOL) { NpuOpRunner::TypeAdapter({*X}, {*Out}, attr_input, dev_ctx, op_func, {framework::proto::VarType::UINT8}, {framework::proto::VarType::UINT8}); - } else if (X->type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(X->dtype()) == + framework::proto::VarType::INT64) { NpuOpRunner::TypeAdapter({*X}, {*Out}, attr_input, dev_ctx, op_func, {framework::proto::VarType::INT32}, {framework::proto::VarType::INT32}); @@ -151,8 +153,8 @@ class ExpandV2NPUGradKernel : public framework::OpKernel { axes.push_back(i); } - Tensor tmp_dout(dout->type()); - Tensor reduced_dout(dx->type()); + Tensor tmp_dout(dout->dtype()); + Tensor reduced_dout(dx->dtype()); tmp_dout.ShareDataWith(*dout); if (axes.size() != 0) { std::vector reduced_dout_dims; diff --git a/paddle/fluid/operators/fill_any_like_op_npu.cc b/paddle/fluid/operators/fill_any_like_op_npu.cc index fc52d2aed2cce..bf00e7104223d 100644 --- a/paddle/fluid/operators/fill_any_like_op_npu.cc +++ b/paddle/fluid/operators/fill_any_like_op_npu.cc @@ -54,7 +54,7 @@ class FillAnyLikeNPUKernel : public framework::OpKernel { std::isnan(value), false, platform::errors::InvalidArgument("The filled value is NaN.")); - Tensor tensor_tmp(data_type); + Tensor tensor_tmp(framework::TransToPtenDataType(data_type)); tensor_tmp.mutable_data({1}, context.GetPlace()); FillNpuTensorWithConstant(&tensor_tmp, static_cast(value)); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h index ed3a6618977f5..8e4aa9fdd69ce 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/pten/kernels/funcs/math_function.h" @@ -61,7 +62,8 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); pten::funcs::SetConstant functor; - out->mutable_data(platform::CPUPlace(), data_type); + out->mutable_data(platform::CPUPlace(), + framework::TransToPtenDataType(data_type)); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); } @@ -69,7 +71,8 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel { if (!cpu_place) { auto &dev_ctx = *pool.Get(ctx.GetPlace()); pten::funcs::SetConstant functor; - out->mutable_data(ctx.GetPlace(), data_type); + out->mutable_data(ctx.GetPlace(), + framework::TransToPtenDataType(data_type)); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); } diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc index 98e03ea66d852..a65f2708950f0 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -71,12 +71,14 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { if (cpu_place) { auto &dev_ctx = *pool.Get(platform::CPUPlace()); pten::funcs::SetConstant functor; - out->mutable_data(platform::CPUPlace(), data_type); + out->mutable_data(platform::CPUPlace(), + framework::TransToPtenDataType(data_type)); functor(reinterpret_cast(dev_ctx), out, static_cast(value)); } else { - out->mutable_data(ctx.GetPlace(), data_type); - Tensor tensor_tmp(data_type); + out->mutable_data(ctx.GetPlace(), + framework::TransToPtenDataType(data_type)); + Tensor tensor_tmp(framework::TransToPtenDataType(data_type)); tensor_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&tensor_tmp, value); diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 15c9241275d10..90aa5aabb82da 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" @@ -120,14 +121,16 @@ class FillConstantKernel : public framework::OpKernel { VLOG(4) << "[CPU] FillConstantKernel" << ((data_type == framework::proto::VarType::BF16) ? "" : ""); - tensor->mutable_data(platform::CPUPlace(), data_type); + tensor->mutable_data(platform::CPUPlace(), + framework::TransToPtenDataType(data_type)); pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CPUPlace()); functor(reinterpret_cast(dev_ctx), tensor, static_cast(value)); } else if (actual_place == 1) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - tensor->mutable_data(ctx.GetPlace(), data_type); + tensor->mutable_data(ctx.GetPlace(), + framework::TransToPtenDataType(data_type)); pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), @@ -138,7 +141,8 @@ class FillConstantKernel : public framework::OpKernel { #endif } else if (actual_place == 2) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - tensor->mutable_data(platform::CUDAPinnedPlace(), data_type); + tensor->mutable_data(platform::CUDAPinnedPlace(), + framework::TransToPtenDataType(data_type)); pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(platform::CUDAPinnedPlace()); functor( @@ -150,7 +154,8 @@ class FillConstantKernel : public framework::OpKernel { #endif } else if (actual_place == 3) { #ifdef PADDLE_WITH_XPU - tensor->mutable_data(ctx.GetPlace(), data_type); + tensor->mutable_data(ctx.GetPlace(), + framework::TransToPtenDataType(data_type)); pten::funcs::SetConstant functor; auto &dev_ctx = *pool.Get(ctx.GetPlace()); functor(reinterpret_cast(dev_ctx), diff --git a/paddle/fluid/operators/fill_constant_op_mlu.cc b/paddle/fluid/operators/fill_constant_op_mlu.cc index c265977715ff1..609af914d4df6 100644 --- a/paddle/fluid/operators/fill_constant_op_mlu.cc +++ b/paddle/fluid/operators/fill_constant_op_mlu.cc @@ -72,8 +72,9 @@ class FillConstantMLUKernel : public framework::OpKernel { auto shape = GetShape(ctx); out_var->mutable_data(shape, ctx.GetPlace()); - MLUCnnlTensorDesc output_desc(*out_var, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(out_var->type())); + MLUCnnlTensorDesc output_desc( + *out_var, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(out_var->dtype()))); MLUCnnl::Fill(ctx, value, output_desc.get(), GetBasePtr(out_var)); } }; diff --git a/paddle/fluid/operators/fill_constant_op_npu.cc b/paddle/fluid/operators/fill_constant_op_npu.cc index d7bd25683ec06..2d02faafead26 100644 --- a/paddle/fluid/operators/fill_constant_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_op_npu.cc @@ -61,7 +61,7 @@ class FillConstantNPUKernel : public framework::OpKernel { out_var->mutable_data(shape, ctx.GetPlace()); if (data_type != framework::proto::VarType::BOOL) { - Tensor tensor_value(data_type); + Tensor tensor_value(framework::TransToPtenDataType(data_type)); tensor_value.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&tensor_value, value); NpuOpRunner runner; diff --git a/paddle/fluid/operators/fill_diagonal_op.cc b/paddle/fluid/operators/fill_diagonal_op.cc index be3239d504844..edc5fdd6f991e 100644 --- a/paddle/fluid/operators/fill_diagonal_op.cc +++ b/paddle/fluid/operators/fill_diagonal_op.cc @@ -138,8 +138,8 @@ class FillIDiagonalGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { // Note: don't get data type from ctx.Input("Input"); - auto dtype = - ctx.Input(framework::GradVarName("Out"))->type(); + auto dtype = framework::TransToProtoVarType( + ctx.Input(framework::GradVarName("Out"))->type()); return framework::OpKernelType(dtype, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/fill_diagonal_tensor_op.cc b/paddle/fluid/operators/fill_diagonal_tensor_op.cc index 0c348693202e1..b4ab7c22c861c 100644 --- a/paddle/fluid/operators/fill_diagonal_tensor_op.cc +++ b/paddle/fluid/operators/fill_diagonal_tensor_op.cc @@ -187,7 +187,8 @@ class FillDiagonalTensorGradOp : public framework::OperatorWithKernel { // Note: don't get data type from ctx.Input("Input"); auto dtype = ctx.Input(framework::GradVarName("Out"))->type(); - return framework::OpKernelType(dtype, ctx.GetPlace()); + return framework::OpKernelType(framework::TransToProtoVarType(dtype), + ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/fill_op.h b/paddle/fluid/operators/fill_op.h index 02e388bcb40f8..8cf4210da4761 100644 --- a/paddle/fluid/operators/fill_op.h +++ b/paddle/fluid/operators/fill_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" @@ -48,9 +49,10 @@ class FillKernel : public framework::OpKernel { out.Resize(framework::make_ddim(ctx.Attr>("shape"))); auto dtype = static_cast(ctx.Attr("dtype")); + auto pten_dtype = framework::TransToPtenDataType(dtype); platform::CPUPlace cpu; auto force_cpu = ctx.Attr("force_cpu"); - out.mutable_data(force_cpu ? cpu : ctx.GetPlace(), dtype); + out.mutable_data(force_cpu ? cpu : ctx.GetPlace(), pten_dtype); framework::LoDTensor tensor; @@ -59,7 +61,7 @@ class FillKernel : public framework::OpKernel { } else { // Always make tensor in CPU memory. tensor.Resize(out.dims()); - tensor.mutable_data(cpu, dtype); + tensor.mutable_data(cpu, pten_dtype); } framework::VisitDataType( diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index 38326e7560c0d..90a50497f78aa 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -66,8 +66,8 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { const std::string padding_algorithm = ctx.Attr("padding_algorithm"); - Tensor transformed_input_channel(input->type()); - Tensor transformed_output(output->type()); + Tensor transformed_input_channel(input->dtype()); + Tensor transformed_output(output->dtype()); transformed_input_channel = *input; transformed_output = *output; T* output_data = transformed_output.data(); diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index 39c7e52cc465c..d141800d61c0e 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -163,7 +163,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { auto input = ctx.Input("X"); - auto input_data_type = input->type(); + auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; @@ -453,7 +453,7 @@ class FusedAttentionGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { auto input = ctx.Input("X"); - auto input_data_type = input->type(); + auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index e9ad2895e03db..072eb52a8aa4f 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -139,16 +140,20 @@ framework::OpKernelType FusedBatchNormActOp::GetExpectedKernelType( if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Scale")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), platform::errors::PreconditionNotMet( "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Bias")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), platform::errors::PreconditionNotMet( "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Mean")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), platform::errors::PreconditionNotMet( "Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Variance")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), platform::errors::PreconditionNotMet( "Variance input should be of float type")); diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index e825ad30782ad..40b31559fe112 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -166,9 +166,9 @@ class FusedBatchNormActKernel /*xDesc=*/data_desc_, /*sizeInBytes=*/&reserve_space_size)); - reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(), + reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->dtype(), reserve_space_size); - workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(), workspace_size); PADDLE_ENFORCE_GPU_SUCCESS( platform::dynload::cudnnBatchNormalizationForwardTrainingEx( diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index 9f446b48b4728..b5870c6d906ce 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -120,10 +120,12 @@ framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType( auto bn_param_type = framework::proto::VarType::FP32; PADDLE_ENFORCE_EQ( - bn_param_type, ctx.Input("Scale")->type(), + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( - bn_param_type, ctx.Input("Bias")->type(), + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument("Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index c5bc5b1725516..12768fa83a7df 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -144,9 +144,9 @@ class FusedBatchNormAddActKernel /*xDesc=*/data_desc_, /*sizeInBytes=*/&reserve_space_size)); - reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(), + reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->dtype(), reserve_space_size); - workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(), workspace_size); PADDLE_ENFORCE_GPU_SUCCESS( platform::dynload::cudnnBatchNormalizationForwardTrainingEx( @@ -277,7 +277,7 @@ class FusedBatchNormAddActGradKernel /*activationDesc=*/activation_desc_, /*sizeInBytes=*/&workspace_size)); - workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(), workspace_size); PADDLE_ENFORCE_GPU_SUCCESS( platform::dynload::cudnnBatchNormalizationBackwardEx( diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc index d51e0de38009b..3e69bf0806756 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc @@ -159,8 +159,8 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.Input("X")->type(), - ctx.Input("Y")->type(), + PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), + ctx.Input("Y")->dtype(), platform::errors::InvalidArgument( "The element's type of input should be the same.")); return framework::OpKernelType( diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc index 4d270280d389c..a72ab1d1b144c 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc @@ -97,7 +97,7 @@ class EmbeddingEltWiseLayerNormOp : public framework::OperatorWithKernel { bool flag = 0; for (auto* input : inputs) { if (input->IsInitialized() && input->numel() > 0) { - input_data_type = input->type(); + input_data_type = framework::TransToProtoVarType(input->dtype()); flag = 1; break; } diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu index 14a6608836a8a..c2c617b8d5238 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu @@ -14,6 +14,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/math/bert_encoder_functor.h" @@ -32,8 +33,10 @@ class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel { auto embs = context.MultiInput("Embs"); int input_num = static_cast(ids.size()); - framework::Tensor in_ids_(framework::proto::VarType::INT64), - in_embs_(framework::proto::VarType::INT64); + framework::Tensor in_ids_( + framework::TransToPtenDataType(framework::proto::VarType::INT64)), + in_embs_( + framework::TransToPtenDataType(framework::proto::VarType::INT64)); framework::DDim in_dim{input_num}; int device_id; #ifdef PADDLE_WITH_HIP diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index 7da790fc5c6e2..1815a4dd71c78 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -289,7 +289,7 @@ class FusedFeedForwardOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { auto input = ctx.Input("X"); - auto input_data_type = input->type(); + auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc index f1deab3e65299..c18eee7912723 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/expect.h" #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" @@ -277,13 +278,14 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { std::shared_ptr h0_memory_p, weight_h_memory_p, weight_x_memory_p; - if (weight_h->type() == paddle::framework::proto::VarType_Type_FP32) { + if (framework::TransToProtoVarType(weight_h->dtype()) == + paddle::framework::proto::VarType_Type_FP32) { h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = handler.template AcquireWeightXMemory(weight_x, origin_mode); weight_h_memory_p = handler.template AcquireWeightHMemory(weight_h, origin_mode); - } else if (weight_h->type() == + } else if (framework::TransToProtoVarType(weight_h->dtype()) == paddle::framework::proto::VarType_Type_BF16) { h0_memory_p = handler.template AcquireH0Memory(h0); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index dfd88248ede34..f3317a1d49554 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/expect.h" #include "paddle/fluid/operators/fused/fusion_lstm_op.h" #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h" @@ -114,7 +115,7 @@ class LSTMMKLDNNHandler void ReorderGates(U* weights, int64_t I) { size_t inner_block_size = this->OC; size_t block_size = inner_block_size * this->G; - for (size_t i = 0; i < (size_t)I; ++i) { + for (size_t i = 0; i < (size_t)I; ++i) { // NOLINT size_t offset = i * block_size; U* base_pos = weights + offset; @@ -341,13 +342,14 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { std::shared_ptr h0_memory_p, weight_h_memory_p, weight_x_memory_p; - if (weight_h->type() == paddle::framework::proto::VarType_Type_FP32) { + if (framework::TransToProtoVarType(weight_h->dtype()) == + paddle::framework::proto::VarType_Type_FP32) { h0_memory_p = handler.template AcquireH0Memory(h0); weight_x_memory_p = handler.template AcquireWeightXMemory(weight_x); weight_h_memory_p = handler.template AcquireWeightHMemory(weight_h); - } else if (weight_h->type() == + } else if (framework::TransToProtoVarType(weight_h->dtype()) == paddle::framework::proto::VarType_Type_BF16) { h0_memory_p = handler.template AcquireH0Memory(h0); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 79e813c52181c..debab26aa6937 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -180,10 +180,12 @@ class ResNetUnitOp : public framework::OperatorWithKernel { // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("ScaleX")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("ScaleX")->dtype()), platform::errors::InvalidArgument( "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("BiasX")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("BiasX")->dtype()), platform::errors::InvalidArgument( "Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; diff --git a/paddle/fluid/operators/gather_nd_op.cu b/paddle/fluid/operators/gather_nd_op.cu index ee51df68c18e5..3a8099c0672ad 100644 --- a/paddle/fluid/operators/gather_nd_op.cu +++ b/paddle/fluid/operators/gather_nd_op.cu @@ -33,7 +33,7 @@ class GatherNdOpCUDAKernel : public framework::OpKernel { output->mutable_data(ctx.GetPlace()); if (x->numel() == 0) return; - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -71,7 +71,7 @@ class GatherNdGradOpCUDAKernel : public framework::OpKernel { dxt.device(place) = dxt.constant(static_cast(0)); if (dO->numel() == 0) return; - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; diff --git a/paddle/fluid/operators/gather_nd_op.h b/paddle/fluid/operators/gather_nd_op.h index e89db1e8732e2..f458c0e18013b 100644 --- a/paddle/fluid/operators/gather_nd_op.h +++ b/paddle/fluid/operators/gather_nd_op.h @@ -38,7 +38,7 @@ class GatherNdOpKernel : public framework::OpKernel { output->mutable_data(ctx.GetPlace()); if (x->numel() == 0) return; - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -75,7 +75,7 @@ class GatherNdGradOpKernel : public framework::OpKernel { dxt.device(place) = dxt.constant(static_cast(0)); if (dO->numel() == 0) return; - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/gather_nd_op_npu.cc b/paddle/fluid/operators/gather_nd_op_npu.cc index 054ccb6e59bab..dd6dcbb8d5b59 100644 --- a/paddle/fluid/operators/gather_nd_op_npu.cc +++ b/paddle/fluid/operators/gather_nd_op_npu.cc @@ -38,7 +38,7 @@ class GatherNdNPUKernel : public framework::OpKernel { return; } - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/gather_nd_op_xpu.cc b/paddle/fluid/operators/gather_nd_op_xpu.cc index a86731bba8ab8..0c67ec3f38ef2 100644 --- a/paddle/fluid/operators/gather_nd_op_xpu.cc +++ b/paddle/fluid/operators/gather_nd_op_xpu.cc @@ -32,7 +32,7 @@ class GatherNdXPUKernel : public framework::OpKernel { return; } - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/gather_op.cu b/paddle/fluid/operators/gather_op.cu index 6e27d95e01855..19568835a6e96 100644 --- a/paddle/fluid/operators/gather_op.cu +++ b/paddle/fluid/operators/gather_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/gather.cu.h" #include "paddle/fluid/operators/gather_op.h" @@ -38,7 +39,8 @@ class GatherOpCUDAKernel : public framework::OpKernel { Tensor cpu_axis; const Tensor *axis_tensor = ctx.Input("Axis"); framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis); - const auto &axis_type = axis_tensor->type(); + const auto &axis_type = + framework::TransToProtoVarType(axis_tensor->dtype()); if (axis_type == framework::proto::VarType::INT32) { axis = static_cast(cpu_axis.data()[0]); } else if (axis_type == framework::proto::VarType::INT64) { @@ -46,7 +48,7 @@ class GatherOpCUDAKernel : public framework::OpKernel { } } const auto &place = ctx.GetPlace(); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (axis != 0) { if (index_type == framework::proto::VarType::INT32) { GatherV2CUDAFunction(x, index, axis, output, place, ctx); @@ -82,7 +84,8 @@ class GatherGradOpCUDAKernel : public framework::OpKernel { const Tensor *axis_tensor = ctx.Input("Axis"); Tensor cpu_axis; framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis); - const auto &axis_type = axis_tensor->type(); + const auto &axis_type = + framework::TransToProtoVarType(axis_tensor->dtype()); if (axis_type == framework::proto::VarType::INT32) { axis = static_cast(cpu_axis.data()[0]); } else if (axis_type == framework::proto::VarType::INT64) { @@ -90,7 +93,7 @@ class GatherGradOpCUDAKernel : public framework::OpKernel { } } - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (axis != 0) { if (index_type == framework::proto::VarType::INT32) { GatherV2GradCUDAFunction(dO, index, axis, dX, diff --git a/paddle/fluid/operators/gather_op.h b/paddle/fluid/operators/gather_op.h index a2570c3e014e1..016c2b398daaa 100644 --- a/paddle/fluid/operators/gather_op.h +++ b/paddle/fluid/operators/gather_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/gather.h" @@ -39,7 +40,8 @@ class GatherOpKernel : public framework::OpKernel { // get axis from tensor if (ctx.HasInput("Axis")) { const Tensor *axis_tensor = ctx.Input("Axis"); - const auto &axis_type = axis_tensor->type(); + const auto &axis_type = + framework::TransToProtoVarType(axis_tensor->dtype()); if (axis_type == framework::proto::VarType::INT32) { axis = static_cast(axis_tensor->data()[0]); } else if (axis_type == framework::proto::VarType::INT64) { @@ -47,7 +49,7 @@ class GatherOpKernel : public framework::OpKernel { } } const auto &place = ctx.GetPlace(); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (axis != 0) { if (index_type == framework::proto::VarType::INT32) { GatherV2Function(x, index, axis, output, place); @@ -82,14 +84,15 @@ class GatherGradientOpKernel : public framework::OpKernel { int axis = ctx.Attr("axis"); if (ctx.HasInput("Axis")) { const Tensor *axis_tensor = ctx.Input("Axis"); - const auto &axis_type = axis_tensor->type(); + const auto &axis_type = + framework::TransToProtoVarType(axis_tensor->dtype()); if (axis_type == framework::proto::VarType::INT32) { axis = static_cast(axis_tensor->data()[0]); } else if (axis_type == framework::proto::VarType::INT64) { axis = static_cast(axis_tensor->data()[0]); } } - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (axis != 0) { if (index_type == framework::proto::VarType::INT32) { diff --git a/paddle/fluid/operators/gather_op_xpu.cc b/paddle/fluid/operators/gather_op_xpu.cc index d9fdbb2a9dd75..99e4b84bc44ec 100644 --- a/paddle/fluid/operators/gather_op_xpu.cc +++ b/paddle/fluid/operators/gather_op_xpu.cc @@ -64,7 +64,8 @@ class GatherOpXPUKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); int r = XPU_SUCCESS; - if (index->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(index->dtype()) == + framework::proto::VarType::INT32) { r = xpu::gather( dev_ctx.x_context(), reinterpret_cast(x->data()), index->data(), reinterpret_cast(output->data()), @@ -129,7 +130,8 @@ class GatherGradOpXPUKernel : public framework::OpKernel { dx->mutable_data(ctx.GetPlace()); int r = XPU_SUCCESS; - if (index->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(index->dtype()) == + framework::proto::VarType::INT32) { r = xpu::gather_grad( dev_ctx.x_context(), reinterpret_cast(dout->data()), diff --git a/paddle/fluid/operators/gaussian_random_op_npu.cc b/paddle/fluid/operators/gaussian_random_op_npu.cc old mode 100755 new mode 100644 index b5ca26edf8fae..0a64db1823d98 --- a/paddle/fluid/operators/gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/gaussian_random_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -35,7 +36,7 @@ class NPUGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - Tensor cpu_tensor(tensor->type()); + Tensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); std::normal_distribution dist(mean, std); diff --git a/paddle/fluid/operators/graph_send_recv_op.cu b/paddle/fluid/operators/graph_send_recv_op.cu index 446ad2d97a7fb..f43d31814ac38 100644 --- a/paddle/fluid/operators/graph_send_recv_op.cu +++ b/paddle/fluid/operators/graph_send_recv_op.cu @@ -360,7 +360,7 @@ class GraphSendRecvOpCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* src_index = ctx.Input("Src_index"); auto* dst_index = ctx.Input("Dst_index"); - auto index_type = src_index->type(); + auto index_type = framework::TransToProtoVarType(src_index->dtype()); if (index_type == framework::proto::VarType::INT32) { GraphSendRecvOpCUDAKernelLaunchHelper( @@ -383,7 +383,7 @@ class GraphSendRecvGradOpCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* src_index = ctx.Input("Dst_index"); auto* dst_index = ctx.Input("Src_index"); - auto index_type = src_index->type(); + auto index_type = framework::TransToProtoVarType(src_index->dtype()); if (index_type == framework::proto::VarType::INT32) { GraphSendRecvGradOpCUDAKernelLaunchHelper( diff --git a/paddle/fluid/operators/graph_send_recv_op.h b/paddle/fluid/operators/graph_send_recv_op.h index 1c7ea74be2ff4..8d8111e0ee845 100644 --- a/paddle/fluid/operators/graph_send_recv_op.h +++ b/paddle/fluid/operators/graph_send_recv_op.h @@ -249,7 +249,7 @@ class GraphSendRecvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* src_index = ctx.Input("Src_index"); - auto index_type = src_index->type(); + auto index_type = framework::TransToProtoVarType(src_index->dtype()); if (index_type == framework::proto::VarType::INT32) { GraphSendRecvOpKernelLaunchHelper(ctx, *src_index); @@ -270,7 +270,7 @@ class GraphSendRecvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* src_index = ctx.Input("Dst_index"); - auto index_type = src_index->type(); + auto index_type = framework::TransToProtoVarType(src_index->dtype()); if (index_type == framework::proto::VarType::INT32) { GraphSendRecvGradOpKernelLaunchHelper(ctx, diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc index 2c7fd8f4173ea..2d284fb516e62 100644 --- a/paddle/fluid/operators/group_norm_op.cc +++ b/paddle/fluid/operators/group_norm_op.cc @@ -204,7 +204,8 @@ class GroupNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL( t, platform::errors::InvalidArgument( "Input(Y@GRAD) Tensor of GroupNormGradOp should not be null")); - return framework::OpKernelType(t->type(), ctx.GetPlace()); + return framework::OpKernelType(framework::TransToProtoVarType(t->dtype()), + ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/increment_op_npu.cc b/paddle/fluid/operators/increment_op_npu.cc index c88dd1a16b657..1c7c8a19110bc 100644 --- a/paddle/fluid/operators/increment_op_npu.cc +++ b/paddle/fluid/operators/increment_op_npu.cc @@ -27,7 +27,7 @@ class IncrementalNPUKernel : public framework::OpKernel { float step = context.Attr("step"); out_tensor->mutable_data(context.GetPlace()); - Tensor step_tensor(x_tensor->type()); + Tensor step_tensor(x_tensor->dtype()); step_tensor.mutable_data({1}, context.GetPlace()); FillNpuTensorWithConstant(&step_tensor, static_cast(step)); diff --git a/paddle/fluid/operators/index_sample_op.cu b/paddle/fluid/operators/index_sample_op.cu index e145c555dc552..5e2ab922850ff 100644 --- a/paddle/fluid/operators/index_sample_op.cu +++ b/paddle/fluid/operators/index_sample_op.cu @@ -85,7 +85,7 @@ class IndexSampleKernel auto* index = ctx.Input("Index"); auto* output = ctx.Output("Out"); - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -144,7 +144,7 @@ class IndexSampleGradKernel const auto* output_grad_data = output_grad->data(); auto* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/index_sample_op.h b/paddle/fluid/operators/index_sample_op.h index ab03c17fd1a6f..a92b205f3f761 100644 --- a/paddle/fluid/operators/index_sample_op.h +++ b/paddle/fluid/operators/index_sample_op.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include "gflags/gflags.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -93,7 +94,8 @@ class IndexSampleKernel : public framework::OpKernel { auto *out_var = ctx.OutputVar("Out"); auto *out_tensor = out_var->GetMutable(); - const auto &index_type = index_tensor.type(); + const auto &index_type = + framework::TransToProtoVarType(index_tensor.dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -169,7 +171,8 @@ class IndexSampleGradKernel : public framework::OpKernel { auto &out_grad_tensor = out_grad_var->Get(); auto *x_grad_tensor = x_grad_var->GetMutable(); - const auto &index_type = index_tensor.type(); + const auto &index_type = + framework::TransToProtoVarType(index_tensor.dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc index 9253f77345dcb..193a27e07e58d 100644 --- a/paddle/fluid/operators/index_sample_op_npu.cc +++ b/paddle/fluid/operators/index_sample_op_npu.cc @@ -60,7 +60,7 @@ class IndexSampleNPUKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { IndexSampleGather(dev_ctx, index, input, out); } else { @@ -113,7 +113,7 @@ class IndexSampleGradNPUKernel : public framework::OpKernel { ctx.Output(framework::GradVarName("X")); x_grad->mutable_data(ctx.GetPlace()); - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { IndexSampleGradScatter(dev_ctx, index, out_grad, x_grad); } else { diff --git a/paddle/fluid/operators/index_select_op.cu b/paddle/fluid/operators/index_select_op.cu index acf959896f949..ad0f13e9c2c47 100644 --- a/paddle/fluid/operators/index_select_op.cu +++ b/paddle/fluid/operators/index_select_op.cu @@ -84,7 +84,7 @@ class IndexSelectCUDAKernel : public framework::OpKernel { int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -142,7 +142,7 @@ class IndexSelectGradCUDAKernel : public framework::OpKernel { int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index b157f775d50eb..5fee9bfd8547c 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -103,7 +103,7 @@ class IndexSelectKernel : public framework::OpKernel { if (dim < 0) { dim += inputs.dims().size(); } - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -211,7 +211,7 @@ class IndexSelectGradKernel : public framework::OpKernel { if (dim < 0) { dim += out_grad->dims().size(); } - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc index 617bf242345c4..bce7a3c1caae3 100644 --- a/paddle/fluid/operators/index_select_op_npu.cc +++ b/paddle/fluid/operators/index_select_op_npu.cc @@ -65,7 +65,8 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { } Tensor casted_index; - if (index->type() != framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(index->dtype()) != + framework::proto::VarType::INT32) { casted_index.mutable_data(index->dims(), ctx.GetPlace()); const auto& cast_runner = NpuOpRunner("Cast", {*index}, {casted_index}, {{"dst_type", ACL_INT32}}); diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index 7a112292c8fc5..e0779249c41ad 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -36,18 +36,23 @@ class InplaceABNOp : public paddle::operators::BatchNormOp { if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Scale")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument( "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Bias")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument( "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Mean")->type(), + PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), platform::errors::InvalidArgument( "Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input("Variance")->type(), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), + platform::errors::InvalidArgument( + "Variance input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; @@ -117,7 +122,8 @@ class InplaceABNGradOp : public paddle::operators::BatchNormGradOp { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { const auto* var = ctx.InputVar(framework::GradVarName("Y")); - auto input_data_type = ctx.Input("Y")->type(); + auto input_data_type = + framework::TransToProtoVarType(ctx.Input("Y")->dtype()); if (var == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "can't find gradient variable of Y")); diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index 8c650c6437632..5d1fb69811829 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -115,12 +115,14 @@ framework::OpKernelType InstanceNormOp::GetExpectedKernelType( in_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(in_param_type, ctx.Input("Scale")->type(), + PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument( "Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(in_param_type, ctx.Input("Bias")->type(), + PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument( "Bias input should be of float type")); } diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc index 8c76961cd6a3c..bf29c2aabb801 100644 --- a/paddle/fluid/operators/interpolate_v2_op_npu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc @@ -65,7 +65,8 @@ struct InterpolateFunction { runner.Run(stream); } void Cast(const Tensor* x, Tensor* y) { - auto dst_dtype = ConvertToNpuDtype(y->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(y->dtype())); const auto& runner = NpuOpRunner( "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); runner.Run(stream); @@ -146,7 +147,7 @@ struct InterpolateFunction { template <> void InterpolateFunction::Arange(int n, Tensor* x) { - Tensor x_fp32(framework::proto::VarType::FP32); + Tensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); diff --git a/paddle/fluid/operators/ipu/ipu_runtime_op.cc b/paddle/fluid/operators/ipu/ipu_runtime_op.cc index 3b6982d4b2b8e..8cbf1a018a570 100644 --- a/paddle/fluid/operators/ipu/ipu_runtime_op.cc +++ b/paddle/fluid/operators/ipu/ipu_runtime_op.cc @@ -46,7 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase { for (size_t i = 0; i < outputs.size(); ++i) { auto* out = outputs[i]; if (out->dims().size() == 0) { - auto tensor_dtype = out->type(); + auto tensor_dtype = framework::TransToProtoVarType(out->dtype()); auto sizeof_dtype = framework::SizeOfType(tensor_dtype); int64_t dim = out->memory_size() / sizeof_dtype; out->Resize({dim}); diff --git a/paddle/fluid/operators/isclose_op.h b/paddle/fluid/operators/isclose_op.h index 7f5052c1e6655..cde5d2afbf009 100644 --- a/paddle/fluid/operators/isclose_op.h +++ b/paddle/fluid/operators/isclose_op.h @@ -59,10 +59,13 @@ class IscloseKernel : public framework::OpKernel { rtol->numel(), 1, platform::errors::InvalidArgument( "Input(Rtol) size must be 1, but get %d.", rtol->numel())); - PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64, - platform::errors::InvalidArgument( - "Input(Rtol) type must be double, but get %s.", - framework::DataTypeToString(rtol->type()))); + PADDLE_ENFORCE_EQ( + framework::TransToProtoVarType(rtol->dtype()), + framework::proto::VarType::FP64, + platform::errors::InvalidArgument( + "Input(Rtol) type must be double, but get %s.", + framework::DataTypeToString( + framework::TransToProtoVarType(rtol->dtype())))); rtol_v = get_tensor_value(dev_ctx, *rtol); } if (ctx.HasInput("Atol")) { @@ -71,10 +74,13 @@ class IscloseKernel : public framework::OpKernel { atol->numel(), 1, platform::errors::InvalidArgument( "Input(Atol) size must be 1, but get %d", atol->numel())); - PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64, - platform::errors::InvalidArgument( - "Input(Atol) type must be double, but get %s", - framework::DataTypeToString(atol->type()))); + PADDLE_ENFORCE_EQ( + framework::TransToProtoVarType(atol->dtype()), + framework::proto::VarType::FP64, + platform::errors::InvalidArgument( + "Input(Atol) type must be double, but get %s", + framework::DataTypeToString( + framework::TransToProtoVarType(atol->dtype())))); atol_v = get_tensor_value(dev_ctx, *atol); } diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index 7fdb0599ebfd1..247fcb6814f24 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -54,9 +54,11 @@ class OverflowOp : public framework::OperatorWithKernel { int dtype = -1; auto *x_var = ctx.InputVar("X"); if (x_var->IsType()) { - dtype = x_var->Get().type(); + dtype = framework::TransToProtoVarType( + x_var->Get().type()); } else if (x_var->IsType()) { - dtype = x_var->Get().value().type(); + dtype = framework::TransToProtoVarType( + x_var->Get().value().type()); } else { PADDLE_ENFORCE_EQ( true, false, diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc index 05a394e682d67..1085bcd72707c 100644 --- a/paddle/fluid/operators/isfinite_v2_op.cc +++ b/paddle/fluid/operators/isfinite_v2_op.cc @@ -61,9 +61,11 @@ class OverflowV2Op : public framework::OperatorWithKernel { int dtype = -1; auto *x_var = ctx.InputVar("X"); if (x_var->IsType()) { - dtype = x_var->Get().type(); + dtype = framework::TransToProtoVarType( + x_var->Get().dtype()); } else if (x_var->IsType()) { - dtype = x_var->Get().value().type(); + dtype = framework::TransToProtoVarType( + x_var->Get().value().dtype()); } else { PADDLE_THROW(plat::errors::InvalidArgument( "Cannot find the input data type by all input data")); diff --git a/paddle/fluid/operators/kron_op.cc b/paddle/fluid/operators/kron_op.cc index 308330313a976..3168c0c43b640 100644 --- a/paddle/fluid/operators/kron_op.cc +++ b/paddle/fluid/operators/kron_op.cc @@ -62,8 +62,9 @@ class KronOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -139,8 +140,9 @@ class KronGradOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/layer_norm_op.cu b/paddle/fluid/operators/layer_norm_op.cu index ef4f0c6ba7063..38b4b1b24b69f 100644 --- a/paddle/fluid/operators/layer_norm_op.cu +++ b/paddle/fluid/operators/layer_norm_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" #include "paddle/fluid/operators/layer_norm_op.h" #include "paddle/fluid/platform/float16.h" @@ -67,18 +68,22 @@ class LayerNormKernel auto *void_scale_data = (scale == nullptr ? nullptr : scale->data()); auto *void_bias_data = (bias == nullptr ? nullptr : bias->data()); - framework::proto::VarType::Type x_dtype = x->type(); + framework::proto::VarType::Type x_dtype = + framework::TransToProtoVarType(x->dtype()); framework::proto::VarType::Type scale_bias_dtype; if (void_scale_data != nullptr) { - scale_bias_dtype = scale->type(); + scale_bias_dtype = framework::TransToProtoVarType(scale->dtype()); if (void_bias_data != nullptr) { - PADDLE_ENFORCE_EQ(scale_bias_dtype, bias->type(), + PADDLE_ENFORCE_EQ(scale_bias_dtype, + framework::TransToProtoVarType(bias->dtype()), platform::errors::InvalidArgument( "Thie Scale and Bias of layer_norm op " "should have the same data type.")); } } else { - scale_bias_dtype = (void_bias_data != nullptr ? bias->type() : x_dtype); + scale_bias_dtype = (void_bias_data != nullptr + ? framework::TransToProtoVarType(bias->dtype()) + : x_dtype); } bool is_scale_bias_same_dtype_with_x = x_dtype == scale_bias_dtype; @@ -193,16 +198,17 @@ class LayerNormGradKernel auto *d_x_data = (d_x == nullptr ? nullptr : d_x->mutable_data(ctx.GetPlace())); - framework::proto::VarType::Type x_dtype = x->type(); + framework::proto::VarType::Type x_dtype = + framework::TransToProtoVarType(x->dtype()); framework::proto::VarType::Type scale_bias_dtype; if (scale != nullptr) { - scale_bias_dtype = scale->type(); + scale_bias_dtype = framework::TransToProtoVarType(scale->dtype()); } else { // FIXME(zengjinle): do not find a better way to get the right // data type of the d_scale and d_bias if scale == nullptr. auto *bias = ctx.Input("Bias"); if (bias != nullptr) { - scale_bias_dtype = bias->saved_type(); + scale_bias_dtype = framework::TransToProtoVarType(bias->dtype()); } else { scale_bias_dtype = x_dtype; } diff --git a/paddle/fluid/operators/layer_norm_op_npu.cc b/paddle/fluid/operators/layer_norm_op_npu.cc index 8cf64b0f8cf6d..13243778cd3e7 100644 --- a/paddle/fluid/operators/layer_norm_op_npu.cc +++ b/paddle/fluid/operators/layer_norm_op_npu.cc @@ -105,11 +105,14 @@ class LayerNormNPUKernel : public framework::OpKernel { // cast scale from LayerNormParamType to T if needed Tensor cast_scale(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - scale->type() == framework::proto::VarType::FP32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(scale->dtype()) == + framework::proto::VarType::FP32) { cast_scale.Resize(scale->dims()); cast_scale.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast_scale = NpuOpRunner("Cast", {*scale}, {cast_scale}, {{"dst_type", static_cast(dst_dtype)}}); @@ -120,11 +123,14 @@ class LayerNormNPUKernel : public framework::OpKernel { // cast bias from LayerNormParamType to T if needed Tensor cast_bias(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - bias->type() == framework::proto::VarType::FP32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(bias->dtype()) == + framework::proto::VarType::FP32) { cast_bias.Resize(bias->dims()); cast_bias.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast_bias = NpuOpRunner("Cast", {*bias}, {cast_bias}, {{"dst_type", static_cast(dst_dtype)}}); @@ -138,9 +144,12 @@ class LayerNormNPUKernel : public framework::OpKernel { // mean should be of U type Tensor* tmp_mean = mean; Tensor cast_mean(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - (scale->type() == framework::proto::VarType::FP32 || - bias->type() == framework::proto::VarType::FP32)) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + (framework::TransToProtoVarType(scale->dtype()) == + framework::proto::VarType::FP32 || + framework::TransToProtoVarType(bias->dtype()) == + framework::proto::VarType::FP32)) { cast_mean.Resize(mean->dims()); cast_mean.mutable_data(ctx.GetPlace()); tmp_mean = &cast_mean; @@ -152,9 +161,12 @@ class LayerNormNPUKernel : public framework::OpKernel { // same for variance Tensor* tmp_variance = variance; Tensor cast_variance(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - (scale->type() == framework::proto::VarType::FP32 || - bias->type() == framework::proto::VarType::FP32)) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + (framework::TransToProtoVarType(scale->dtype()) == + framework::proto::VarType::FP32 || + framework::TransToProtoVarType(bias->dtype()) == + framework::proto::VarType::FP32)) { cast_variance.Resize(variance->dims()); cast_variance.mutable_data(ctx.GetPlace()); tmp_variance = &cast_variance; @@ -171,18 +183,24 @@ class LayerNormNPUKernel : public framework::OpKernel { runner.Run(stream); // cast back from FP16 to FP32 - if (x->type() == framework::proto::VarType::FP16 && - mean->type() == framework::proto::VarType::FP32) { - auto dst_dtype = ConvertToNpuDtype(mean->type()); + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(mean->dtype()) == + framework::proto::VarType::FP32) { + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(mean->type())); const auto& runner_cast_mean = NpuOpRunner("Cast", {*tmp_mean}, {*mean}, {{"dst_type", static_cast(dst_dtype)}}); runner_cast_mean.Run(stream); } // same for variance - if (x->type() == framework::proto::VarType::FP16 && - variance->type() == framework::proto::VarType::FP32) { - auto dst_dtype = ConvertToNpuDtype(variance->type()); + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(variance->dtype()) == + framework::proto::VarType::FP32) { + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(variance->type())); const auto& runner_cast_variance = NpuOpRunner("Cast", {*tmp_variance}, {*variance}, {{"dst_type", static_cast(dst_dtype)}}); @@ -260,11 +278,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // cast scale from LayerNormParamType to T if needed Tensor cast_scale(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - scale->type() == framework::proto::VarType::FP32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(scale->dtype()) == + framework::proto::VarType::FP32) { cast_scale.Resize(scale->dims()); cast_scale.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast_scale = NpuOpRunner("Cast", {*scale}, {cast_scale}, {{"dst_type", static_cast(dst_dtype)}}); @@ -275,11 +296,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // cast mean from LayerNormParamType to T if needed Tensor cast_mean(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - mean->type() == framework::proto::VarType::FP32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(mean->dtype()) == + framework::proto::VarType::FP32) { cast_mean.Resize(mean->dims()); cast_mean.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast_mean = NpuOpRunner("Cast", {*mean}, {cast_mean}, {{"dst_type", static_cast(dst_dtype)}}); @@ -290,11 +314,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // cast variance from LayerNormParamType to T if needed Tensor cast_variance(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - variance->type() == framework::proto::VarType::FP32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(variance->dtype()) == + framework::proto::VarType::FP32) { cast_variance.Resize(variance->dims()); cast_variance.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(x->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); const auto& runner_cast_variance = NpuOpRunner("Cast", {*variance}, {cast_variance}, {{"dst_type", static_cast(dst_dtype)}}); @@ -318,9 +345,12 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // dscale should be of U type Tensor* tmp_dscale = dscale; Tensor cast_dscale(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - (mean->type() == framework::proto::VarType::FP32 || - variance->type() == framework::proto::VarType::FP32)) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + (framework::TransToProtoVarType(mean->dtype()) == + framework::proto::VarType::FP32 || + framework::TransToProtoVarType(variance->dtype()) == + framework::proto::VarType::FP32)) { cast_dscale.Resize(dscale->dims()); cast_dscale.mutable_data(ctx.GetPlace()); tmp_dscale = &cast_dscale; @@ -332,9 +362,12 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // same for dbias Tensor* tmp_dbias = dbias; Tensor cast_dbias(x->type()); - if (x->type() == framework::proto::VarType::FP16 && - (mean->type() == framework::proto::VarType::FP32 || - variance->type() == framework::proto::VarType::FP32)) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + (framework::TransToProtoVarType(mean->dtype()) == + framework::proto::VarType::FP32 || + framework::TransToProtoVarType(variance->dtype()) == + framework::proto::VarType::FP32)) { cast_dbias.Resize(dbias->dims()); cast_dbias.mutable_data(ctx.GetPlace()); tmp_dbias = &cast_dbias; @@ -349,18 +382,24 @@ class LayerNormGradNPUKernel : public framework::OpKernel { runner.Run(stream); // cast back from FP16 to FP32 - if (x->type() == framework::proto::VarType::FP16 && - dscale->type() == framework::proto::VarType::FP32) { - auto dst_dtype = ConvertToNpuDtype(dscale->type()); + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(dscale->dtype()) == + framework::proto::VarType::FP32) { + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(dscale->type())); const auto& runner_cast_dscale = NpuOpRunner("Cast", {*tmp_dscale}, {*dscale}, {{"dst_type", static_cast(dst_dtype)}}); runner_cast_dscale.Run(stream); } // same for dbias - if (x->type() == framework::proto::VarType::FP16 && - dbias->type() == framework::proto::VarType::FP32) { - auto dst_dtype = ConvertToNpuDtype(dbias->type()); + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(dbias->dtype()) == + framework::proto::VarType::FP32) { + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(dbias->type())); const auto& runner_cast_dbias = NpuOpRunner("Cast", {*tmp_dbias}, {*dbias}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index 4bf2a7cb372cb..885d8de0d93b2 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -54,10 +54,10 @@ class CUDALinspaceKernel : public framework::OpKernel { Tensor start_t; Tensor stop_t; - auto start_dtype = - framework::OpKernelType(pre_start->type(), context.GetPlace()); - auto stop_dtype = - framework::OpKernelType(pre_stop->type(), context.GetPlace()); + auto start_dtype = framework::OpKernelType( + framework::TransToProtoVarType(pre_start->dtype()), context.GetPlace()); + auto stop_dtype = framework::OpKernelType( + framework::TransToProtoVarType(pre_stop->dtype()), context.GetPlace()); auto out_dtype = framework::OpKernelType(dtype, context.GetPlace()); framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index 7e384f4b64bc3..e5f2c9e883cf8 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -36,10 +36,10 @@ class CPULinspaceKernel : public framework::OpKernel { Tensor start_t; Tensor stop_t; - auto start_dtype = - framework::OpKernelType(pre_start->type(), context.GetPlace()); - auto stop_dtype = - framework::OpKernelType(pre_stop->type(), context.GetPlace()); + auto start_dtype = framework::OpKernelType( + framework::TransToProtoVarType(pre_start->dtype()), context.GetPlace()); + auto stop_dtype = framework::OpKernelType( + framework::TransToProtoVarType(pre_stop->dtype()), context.GetPlace()); auto out_dtype = framework::OpKernelType(dtype, context.GetPlace()); framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h index 3a97b1cd84819..f344188664b81 100644 --- a/paddle/fluid/operators/load_combine_op.h +++ b/paddle/fluid/operators/load_combine_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" @@ -108,7 +109,7 @@ class LoadCombineOpKernel : public framework::OpKernel { // Get data from fin to tensor paddle::framework::DeserializeFromStream(*buffer, tensor, dev_ctx); - auto in_dtype = tensor->type(); + auto in_dtype = framework::TransToProtoVarType(tensor->dtype()); auto out_dtype = load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; diff --git a/paddle/fluid/operators/load_op.h b/paddle/fluid/operators/load_op.h index 89ad4325a5a53..52ef4d541dff0 100644 --- a/paddle/fluid/operators/load_op.h +++ b/paddle/fluid/operators/load_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" @@ -82,7 +83,7 @@ class LoadOpKernel : public framework::OpKernel { } auto load_as_fp16 = ctx.Attr("load_as_fp16"); - auto in_dtype = tensor->type(); + auto in_dtype = framework::TransToProtoVarType(tensor->dtype()); auto out_dtype = load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; if (in_dtype != out_dtype) { diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index 5f39a9afa94ba..055952f175ca6 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -78,7 +78,8 @@ struct LoDTensorToArrayFunctor : public boost::static_visitor { LoDTensorToArrayFunctorImpl func; func.prev_functor_ = this; func.dev_ctx_ = dev_ctx; - framework::VisitDataType(input_.type(), func); + framework::VisitDataType(framework::TransToProtoVarType(input_.dtype()), + func); } }; diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index 91b7f91c8e3bc..c0c6809656587 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -87,7 +87,8 @@ class LookupTableKernel : public framework::OpKernel { int64_t row_width = table_t.value().dims()[1]; const auto *table = table_t.value().data(); auto *output = output_t->mutable_data(context.GetPlace()); - auto input_data_type = table_t.value().type(); + auto input_data_type = + framework::TransToProtoVarType(table_t.value().dtype()); for (int64_t i = 0; i < ids_numel; ++i) { if (padding_idx != kNoPadding && ids[i] == padding_idx) { memset(output + i * row_width, 0, row_width * sizeof(T)); diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index d182385cce9d2..e327c783ea694 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -109,7 +109,8 @@ class LookupTableV2CUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { const auto *ids_t = context.Input("Ids"); LookupTableV2CUDAFunctor functor(context, ids_t); - framework::VisitIntDataType(ids_t->type(), functor); + framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()), + functor); } }; @@ -216,7 +217,8 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { const auto *ids_t = context.Input("Ids"); LookupTableV2GradCUDAFunctor functor(context, ids_t); - framework::VisitIntDataType(ids_t->type(), functor); + framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()), + functor); } }; diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index bc433b1e10f3e..88149f53ac7e1 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -100,7 +100,8 @@ struct LookupTableV2CPUFunctor { int64_t row_width = table_t.value().dims()[1]; const auto *table = table_t.value().template data(); auto *output = output_t->template mutable_data(context_.GetPlace()); - auto input_data_type = table_t.value().type(); + auto input_data_type = + framework::TransToProtoVarType(table_t.value().dtype()); for (int64_t i = 0; i < ids_numel; ++i) { if (padding_idx != kNoPadding && ids[i] == padding_idx) { @@ -143,7 +144,8 @@ class LookupTableV2Kernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { const auto *ids = context.Input("Ids"); LookupTableV2CPUFunctor functor(context, ids); - framework::VisitIntDataType(ids->type(), functor); + framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()), + functor); } }; @@ -257,7 +259,8 @@ class LookupTableV2GradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &context) const override { const auto *ids = context.Input("Ids"); LookupTableV2GradCPUFunctor functor(context, ids); - framework::VisitIntDataType(ids->type(), functor); + framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()), + functor); } }; diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index 0dfb0cf183086..4b3b47d352c91 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -134,7 +134,8 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel { runner_scatter.Run(stream); } else { Tensor casted_ids_t; - if (ids_t->type() != framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(ids_t->dtype()) != + framework::proto::VarType::INT32) { casted_ids_t.mutable_data(ids_t->dims(), ctx.GetPlace()); const auto &cast_runner = NpuOpRunner("Cast", {*ids_t}, {casted_ids_t}, {{"dst_type", ACL_INT32}}); diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index dd0cff5cc5f44..4819bd7251832 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -177,7 +177,7 @@ class LstsqCPUKernel : public framework::OpKernel { // "rwork" only used for complex inputs and "gelsy/gelsd/gelss" drivers Tensor rwork; ValueType* rwork_data = nullptr; - if (framework::IsComplexType(x.type()) && + if (framework::IsComplexType(framework::TransToProtoVarType(x.dtype())) && driver != LapackDriverType::Gels) { int rwork_len = 0; if (driver == LapackDriverType::Gelsy) { diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index b3d79122bcd83..11174540cb0cd 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -38,7 +38,7 @@ void SetValueCompute(const framework::ExecutionContext& ctx, std::vector decrease_axes = {}; std::vector none_axes = {}; - auto dtype = in->type(); + auto dtype = framework::TransToProtoVarType(in->dtype()); auto in_dims = in->dims(); CheckAndUpdateSliceAttrs(in_dims, axes, starts, ends, &steps); @@ -88,7 +88,8 @@ void SetValueCompute(const framework::ExecutionContext& ctx, // set_value is what we want. paddle::framework::TensorCopy(*in, place, out); - Tensor slice_tensor(dtype), pad_tensor(dtype); + Tensor slice_tensor(framework::TransToPtenDataType(dtype)), + pad_tensor(framework::TransToPtenDataType(dtype)); slice_tensor.mutable_data(slice_dims, place); pad_tensor.mutable_data(in_dims, place); @@ -146,7 +147,7 @@ void SetValueCompute(const framework::ExecutionContext& ctx, ElementwiseComputeEx, DeviceContext, T>( ctx, &slice_tensor, value_tensor, -1, SubFunctor(), &slice_tensor); } else { - Tensor value_t(dtype); + Tensor value_t(framework::TransToPtenDataType(dtype)); auto value_dims = framework::make_ddim(shape); CheckIsDimsMatch(slice_dims_for_assign, value_dims); diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index a59909644aa25..386d93ce13219 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -72,8 +72,9 @@ void GetClassInterval(const gpuStream_t& stream, const platform::Place& place, PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( num_classes_per_device_ptr, num_classes_per_device_ptr, num_classes_per_device.numel(), - platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum, - comm->comm(), calcu_stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(num_classes_per_device.dtype())), + ncclSum, comm->comm(), calcu_stream)); auto class_interval_ptr = class_interval->mutable_data({nranks + 1}, place); @@ -250,7 +251,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { int blocks = NumBlocks(N); int threads = kNumCUDAThreads; - const auto& label_type = labels->type(); + const auto& label_type = framework::TransToProtoVarType(labels->dtype()); // copy logits to softmax variable since we can't modify logits, // and it also be used when calculate grad @@ -306,8 +307,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { if (nranks > 1) { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( logits_max_buff, logits_max_buff, logits_max.numel(), - platform::ToNCCLDataType(logits_max.type()), ncclMax, comm->comm(), - stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(logits_max.dtype())), + ncclMax, comm->comm(), stream)); } #endif @@ -328,8 +330,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { if (nranks > 1) { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( sum_exp_logits_buff, sum_exp_logits_buff, sum_exp_logits.numel(), - platform::ToNCCLDataType(sum_exp_logits.type()), ncclSum, - comm->comm(), stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(sum_exp_logits.dtype())), + ncclSum, comm->comm(), stream)); } #endif @@ -361,8 +364,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { if (nranks > 1) { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( loss_ptr, loss_ptr, loss->numel(), - platform::ToNCCLDataType(loss->type()), ncclSum, comm->comm(), - stream)); + platform::ToNCCLDataType( + framework::TransToProtoVarType(loss->dtype())), + ncclSum, comm->comm(), stream)); } #endif } @@ -409,7 +413,7 @@ class MarginCrossEntropyGradCUDAKernel : public framework::OpKernel { int blocks = NumBlocks(N * D); int threads = kNumCUDAThreads; - const auto& label_type = labels->type(); + const auto& label_type = framework::TransToProtoVarType(labels->dtype()); Tensor class_interval; GetClassInterval(dev_ctx.stream(), context.GetPlace(), diff --git a/paddle/fluid/operators/math/beam_search_npu.cc b/paddle/fluid/operators/math/beam_search_npu.cc index 2d5a3dae33b32..96f490c597af0 100644 --- a/paddle/fluid/operators/math/beam_search_npu.cc +++ b/paddle/fluid/operators/math/beam_search_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/beam_search.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#include "paddle/pten/common/data_type.h" namespace pten { class DenseTensor; @@ -69,11 +70,13 @@ class BeamSearchFunctor { // Step1: Define Tensors and Preprocess the situation that pre_id == end_id // cast ids and pre_ids from int to float32 - Tensor ids_int32(framework::proto::VarType::INT32); - if (ids->type() != framework::proto::VarType::INT32) { + Tensor ids_int32(experimental::DataType::INT32); + if (framework::TransToProtoVarType(ids->dtype()) != + framework::proto::VarType::INT32) { ids_int32.Resize(ids->dims()); ids_int32.mutable_data(ctx.GetPlace()); - auto dst_dtype_ids_int32 = ConvertToNpuDtype(ids_int32.type()); + auto dst_dtype_ids_int32 = + ConvertToNpuDtype(framework::TransToProtoVarType(ids_int32.dtype())); const auto& runner_ids_int32 = NpuOpRunner("Cast", {*ids}, {ids_int32}, {{"dst_type", static_cast(dst_dtype_ids_int32)}}); @@ -82,11 +85,13 @@ class BeamSearchFunctor { ids_int32.ShareDataWith(*ids); } - Tensor pre_ids_int32(framework::proto::VarType::INT32); - if (pre_ids->type() != framework::proto::VarType::INT32) { + Tensor pre_ids_int32(experimental::DataType::INT32); + if (framework::TransToProtoVarType(pre_ids->dtype()) != + framework::proto::VarType::INT32) { pre_ids_int32.Resize(pre_ids->dims()); pre_ids_int32.mutable_data(ctx.GetPlace()); - auto dst_dtype_pre_ids_int32 = ConvertToNpuDtype(pre_ids_int32.type()); + auto dst_dtype_pre_ids_int32 = ConvertToNpuDtype( + framework::TransToProtoVarType(pre_ids_int32.dtype())); const auto& runner_pre_ids_int32 = NpuOpRunner( "Cast", {*pre_ids}, {pre_ids_int32}, {{"dst_type", static_cast(dst_dtype_pre_ids_int32)}}); @@ -95,7 +100,7 @@ class BeamSearchFunctor { pre_ids_int32.ShareDataWith(*pre_ids); } - Tensor expand_pre_ids(pre_ids_int32.type()); + Tensor expand_pre_ids(pre_ids_int32.dtype()); expand_pre_ids.Resize(framework::make_ddim({batch_size, seq_width})); expand_pre_ids.mutable_data(place); const auto& runner_tile_pre_ids = @@ -104,7 +109,7 @@ class BeamSearchFunctor { runner_tile_pre_ids.Run(stream); expand_pre_ids.Resize(ids_int32.dims()); - Tensor expand_pre_scores(pre_scores->type()); + Tensor expand_pre_scores(pre_scores->dtype()); expand_pre_scores.Resize(framework::make_ddim({batch_size, seq_width})); expand_pre_scores.mutable_data(place); const auto& runner_tile_pre_scores = @@ -114,11 +119,11 @@ class BeamSearchFunctor { expand_pre_scores.Resize(scores->dims()); // End_id Tensors - Tensor end_id_tmp_tensor(framework::proto::VarType::INT32); + Tensor end_id_tmp_tensor(experimental::DataType::INT32); end_id_tmp_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&end_id_tmp_tensor, end_id); - Tensor end_id_tensors(ids_int32.type()); + Tensor end_id_tensors(ids_int32.dtype()); end_id_tensors.mutable_data(ids_int32.dims(), place); const auto& runner_fill_end_id = NpuOpRunner("FillD", {end_id_tmp_tensor}, {end_id_tensors}, @@ -126,7 +131,7 @@ class BeamSearchFunctor { runner_fill_end_id.Run(stream); // whether expand_pre_ids == end_ids? - Tensor equal_end_ids(framework::proto::VarType::BOOL); + Tensor equal_end_ids(experimental::DataType::BOOL); equal_end_ids.mutable_data(ids_int32.dims(), place); const auto& runner_equal_end_ids = NpuOpRunner( "Equal", {expand_pre_ids, end_id_tensors}, {equal_end_ids}, {}); @@ -136,11 +141,11 @@ class BeamSearchFunctor { // [[False, True, True, True, ...], // [False, True, True, True, ...], // ...] - Tensor false_tmp_tensor(framework::proto::VarType::INT32); + Tensor false_tmp_tensor(experimental::DataType::INT32); false_tmp_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&false_tmp_tensor, static_cast(false)); - Tensor first_pos_false_tensors(framework::proto::VarType::INT32); + Tensor first_pos_false_tensors(experimental::DataType::INT32); first_pos_false_tensors.Resize(framework::make_ddim({batch_size, 1})); first_pos_false_tensors.mutable_data(place); std::vector fill_dims = {batch_size, 1}; @@ -149,16 +154,16 @@ class BeamSearchFunctor { "FillD", {false_tmp_tensor}, {first_pos_false_tensors}, fill_attr); runner_fill_false_tensors.Run(stream); - Tensor pos_tensors(framework::proto::VarType::INT32); + Tensor pos_tensors(experimental::DataType::INT32); if (seq_width > 1) { pos_tensors.Resize(framework::make_ddim({batch_size, seq_width})); pos_tensors.mutable_data(place); - Tensor true_tmp_tensor(framework::proto::VarType::INT32); + Tensor true_tmp_tensor(experimental::DataType::INT32); true_tmp_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&true_tmp_tensor, static_cast(true)); - Tensor second_pos_true_tensors(framework::proto::VarType::INT32); + Tensor second_pos_true_tensors(experimental::DataType::INT32); second_pos_true_tensors.Resize( framework::make_ddim({batch_size, seq_width - 1})); second_pos_true_tensors.mutable_data(place); @@ -182,10 +187,11 @@ class BeamSearchFunctor { pos_tensors.ShareDataWith(first_pos_false_tensors); } - Tensor cast_pos_tensors_bool(framework::proto::VarType::BOOL); + Tensor cast_pos_tensors_bool(experimental::DataType::BOOL); cast_pos_tensors_bool.Resize(pos_tensors.dims()); cast_pos_tensors_bool.mutable_data(ctx.GetPlace()); - auto dst_dtype = ConvertToNpuDtype(cast_pos_tensors_bool.type()); + auto dst_dtype = ConvertToNpuDtype( + framework::TransToProtoVarType(cast_pos_tensors_bool.type())); const auto& runner_cast_pos_tensors = NpuOpRunner("Cast", {pos_tensors}, {cast_pos_tensors_bool}, {{"dst_type", static_cast(dst_dtype)}}); @@ -193,7 +199,7 @@ class BeamSearchFunctor { // if pre_ids == end_ids, save only one score, and others become -inf // construct pre_ids == end_ids and save only one score - Tensor save_one_end_score(framework::proto::VarType::BOOL); + Tensor save_one_end_score(experimental::DataType::BOOL); save_one_end_score.mutable_data(ids_int32.dims(), place); const auto& runner_logical_and = NpuOpRunner("LogicalAnd", {equal_end_ids, cast_pos_tensors_bool}, @@ -202,13 +208,13 @@ class BeamSearchFunctor { // if save_one_end_score is True, set score to -inf // define -Inf Tensors - Tensor ninf_tmp_tensor(scores->type()); + Tensor ninf_tmp_tensor(scores->dtype()); ninf_tmp_tensor.mutable_data({1}, ctx.GetPlace()); float ninf_value = static_cast(-std::numeric_limits::infinity()); FillNpuTensorWithConstant(&ninf_tmp_tensor, ninf_value); - Tensor ninf_tensors(scores->type()); + Tensor ninf_tensors(scores->dtype()); ninf_tensors.mutable_data(scores->dims(), place); const auto& runner_fill_ninf = NpuOpRunner("FillD", {ninf_tmp_tensor}, {ninf_tensors}, @@ -218,22 +224,22 @@ class BeamSearchFunctor { // Step2: calculate topk scores // get scores used in topk op - Tensor tmp_scores(scores->type()); + Tensor tmp_scores(scores->dtype()); tmp_scores.mutable_data(scores->dims(), place); if (!is_accumulated) { // if pre_id == end_id, cal_scores = pre_score, and id = end_id // else, cal_score = pre_score + log(score) // calculate log(scores) - Tensor log_scores(scores->type()); + Tensor log_scores(scores->dtype()); log_scores.mutable_data(scores->dims(), place); - Tensor one(scores->type()); + Tensor one(scores->dtype()); one.mutable_data(scores->dims(), place); const auto& runner_one = NpuOpRunner("OnesLike", {*scores}, {one}, {}); runner_one.Run(stream); - Tensor sub(scores->type()); + Tensor sub(scores->dtype()); sub.mutable_data(scores->dims(), place); const auto& runner_sub = NpuOpRunner("Sub", {*scores, one}, {sub}, {}); runner_sub.Run(stream); @@ -261,7 +267,7 @@ class BeamSearchFunctor { } // if pre_ids == end_ids, save only one score, and others become -inf - Tensor cal_scores(scores->type()); + Tensor cal_scores(scores->dtype()); cal_scores.mutable_data(scores->dims(), place); const auto& runner_select_inf_score = NpuOpRunner("Select", {save_one_end_score, ninf_tensors, tmp_scores}, @@ -273,12 +279,12 @@ class BeamSearchFunctor { cal_scores.Resize( framework::make_ddim({num_seqs, real_beam_size * seq_width})); - Tensor topk_scores(scores->type()); + Tensor topk_scores(scores->dtype()); topk_scores.Resize( framework::make_ddim({num_seqs, static_cast(beam_size)})); topk_scores.mutable_data(ctx.GetPlace()); - Tensor tmp_indices(framework::proto::VarType::INT32); + Tensor tmp_indices(experimental::DataType::INT32); tmp_indices.Resize( framework::make_ddim({num_seqs, static_cast(beam_size)})); tmp_indices.mutable_data(ctx.GetPlace()); @@ -296,21 +302,21 @@ class BeamSearchFunctor { runner_topk.Run(stream); // cast tmp_indices from int to float32 for Sort op - Tensor cast_tmp_indices(framework::proto::VarType::FP32); + Tensor cast_tmp_indices(experimental::DataType::FLOAT32); cast_tmp_indices.Resize(tmp_indices.dims()); cast_tmp_indices.mutable_data(ctx.GetPlace()); - auto dst_dtype_tmp_indices_fp32 = - ConvertToNpuDtype(cast_tmp_indices.type()); + auto dst_dtype_tmp_indices_fp32 = ConvertToNpuDtype( + framework::TransToProtoVarType(cast_tmp_indices.type())); const auto& runner_cast_tmp_indices = NpuOpRunner( "Cast", {tmp_indices}, {cast_tmp_indices}, {{"dst_type", static_cast(dst_dtype_tmp_indices_fp32)}}); runner_cast_tmp_indices.Run(stream); // sort tmp_indices - Tensor sorted_tmp_indices(framework::proto::VarType::FP32); + Tensor sorted_tmp_indices(experimental::DataType::FLOAT32); sorted_tmp_indices.Resize(tmp_indices.dims()); sorted_tmp_indices.mutable_data(ctx.GetPlace()); - Tensor sorted_score_indices(framework::proto::VarType::INT32); + Tensor sorted_score_indices(experimental::DataType::INT32); sorted_score_indices.Resize(tmp_indices.dims()); sorted_score_indices.mutable_data(ctx.GetPlace()); const auto& runner_sort_tmp_indices = NpuOpRunner( @@ -319,11 +325,11 @@ class BeamSearchFunctor { runner_sort_tmp_indices.Run(stream); // cast sorted_tmp_indices from float32 to int - Tensor cast_sort_tmp_indices(framework::proto::VarType::INT32); + Tensor cast_sort_tmp_indices(experimental::DataType::INT32); cast_sort_tmp_indices.Resize(sorted_tmp_indices.dims()); cast_sort_tmp_indices.mutable_data(ctx.GetPlace()); - auto dst_dtype_tmp_indices_int32 = - ConvertToNpuDtype(cast_sort_tmp_indices.type()); + auto dst_dtype_tmp_indices_int32 = ConvertToNpuDtype( + framework::TransToProtoVarType(cast_sort_tmp_indices.type())); const auto& runner_cast_sort_tmp_indices = NpuOpRunner( "Cast", {sorted_tmp_indices}, {cast_sort_tmp_indices}, {{"dst_type", static_cast(dst_dtype_tmp_indices_int32)}}); @@ -332,7 +338,7 @@ class BeamSearchFunctor { // Step 3: infer selected ids from tmp_indices and ids // if pre_ids == end_ids, use pre_ids rather than ids - Tensor cal_ids(ids_int32.type()); + Tensor cal_ids(ids_int32.dtype()); cal_ids.mutable_data(ids_int32.dims(), place); const auto& runner_select_equal_end_id = NpuOpRunner( "Select", {equal_end_ids, expand_pre_ids, ids_int32}, {cal_ids}, {}); @@ -347,7 +353,7 @@ class BeamSearchFunctor { // construct batch_ids like [[0, 0, 0], [1, 1, 1], ..., [bs-1, bs-1, bs-1]] // construct arange(num_seqs*beam_size).reshape((num_seqs, beam_size)) // // beam_size - Tensor batch_ids(framework::proto::VarType::INT32); + Tensor batch_ids(experimental::DataType::INT32); batch_ids.Resize( framework::make_ddim({num_seqs, static_cast(beam_size), 1})); batch_ids.mutable_data(place); @@ -362,7 +368,7 @@ class BeamSearchFunctor { // sort topk_scores to get selected_scores // get indices of gather_nd op for calculating selected_scores - Tensor gather_nd_score_indices(framework::proto::VarType::INT32); + Tensor gather_nd_score_indices(experimental::DataType::INT32); gather_nd_score_indices.Resize( framework::make_ddim({num_seqs, static_cast(beam_size), 2})); gather_nd_score_indices.mutable_data(place); @@ -388,7 +394,7 @@ class BeamSearchFunctor { // get indices of gather_nd op cast_sort_tmp_indices.Resize( framework::make_ddim({num_seqs, static_cast(beam_size), 1})); - Tensor gather_nd_id_indices(framework::proto::VarType::INT32); + Tensor gather_nd_id_indices(experimental::DataType::INT32); gather_nd_id_indices.Resize( framework::make_ddim({num_seqs, static_cast(beam_size), 2})); gather_nd_id_indices.mutable_data(place); @@ -403,7 +409,7 @@ class BeamSearchFunctor { runner_concat_id_indices.Run(stream); // use gather_nd to get selected_ids - Tensor topk_ids(framework::proto::VarType::INT32); + Tensor topk_ids(experimental::DataType::INT32); topk_ids.Resize( framework::make_ddim({num_seqs, static_cast(beam_size)})); topk_ids.mutable_data(ctx.GetPlace()); @@ -413,7 +419,8 @@ class BeamSearchFunctor { runner_gather_nd_ids.Run(stream); // cast topk_ids from int to int64 to get selected_ids - auto dst_dtype_selected_ids = ConvertToNpuDtype(selected_ids->type()); + auto dst_dtype_selected_ids = + ConvertToNpuDtype(framework::TransToProtoVarType(selected_ids->type())); const auto& runner_cast_selected_ids = NpuOpRunner("Cast", {topk_ids}, {*selected_ids}, {{"dst_type", static_cast(dst_dtype_selected_ids)}}); @@ -423,13 +430,13 @@ class BeamSearchFunctor { // Step 4: set lod of output Tensor // define Tensor with value `seq_width` - Tensor seq_width_tensor(framework::proto::VarType::INT32); + Tensor seq_width_tensor(experimental::DataType::INT32); seq_width_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&seq_width_tensor, static_cast(seq_width)); // beam_ids = tmp_indices // seq_width - Tensor beam_ids(framework::proto::VarType::INT32); + Tensor beam_ids(experimental::DataType::INT32); beam_ids.Resize( framework::make_ddim({num_seqs, static_cast(beam_size)})); beam_ids.mutable_data(ctx.GetPlace()); @@ -447,17 +454,18 @@ class BeamSearchFunctor { framework::make_ddim({num_seqs, static_cast(beam_size)})); // cast batch_ids from int to float32 - Tensor cast_batch_ids(framework::proto::VarType::FP32); + Tensor cast_batch_ids(experimental::DataType::FLOAT32); cast_batch_ids.Resize(batch_ids.dims()); cast_batch_ids.mutable_data(ctx.GetPlace()); - auto dst_dtype1 = ConvertToNpuDtype(cast_batch_ids.type()); + auto dst_dtype1 = ConvertToNpuDtype( + framework::TransToProtoVarType(cast_batch_ids.type())); const auto& runner_cast_batch_ids = NpuOpRunner("Cast", {batch_ids}, {cast_batch_ids}, {{"dst_type", static_cast(dst_dtype1)}}); runner_cast_batch_ids.Run(stream); // scale batch_ids with beam_size - Tensor scale_batch_ids(framework::proto::VarType::FP32); + Tensor scale_batch_ids(experimental::DataType::FLOAT32); scale_batch_ids.Resize(batch_ids.dims()); scale_batch_ids.mutable_data(place); const auto& runner_power = @@ -468,17 +476,18 @@ class BeamSearchFunctor { runner_power.Run(stream); // cast cast_scale_batch_ids from float32 to int - Tensor cast_scale_batch_ids(framework::proto::VarType::INT32); + Tensor cast_scale_batch_ids(experimental::DataType::INT32); cast_scale_batch_ids.Resize(scale_batch_ids.dims()); cast_scale_batch_ids.mutable_data(ctx.GetPlace()); - auto dst_dtype2 = ConvertToNpuDtype(cast_scale_batch_ids.type()); + auto dst_dtype2 = ConvertToNpuDtype( + framework::TransToProtoVarType(cast_scale_batch_ids.type())); const auto& runner_cast_scale_batch_ids = NpuOpRunner("Cast", {scale_batch_ids}, {cast_scale_batch_ids}, {{"dst_type", static_cast(dst_dtype2)}}); runner_cast_scale_batch_ids.Run(stream); // calculate parent_idx - Tensor tmp_parent_idx(framework::proto::VarType::INT32); + Tensor tmp_parent_idx(experimental::DataType::INT32); tmp_parent_idx.Resize(parent_idx->dims()); tmp_parent_idx.mutable_data(place); const auto& runner_add_beam_id = NpuOpRunner( @@ -486,7 +495,8 @@ class BeamSearchFunctor { runner_add_beam_id.Run(stream); // cast tmp_parent_idx from int to int64 to get parent_idx - auto dst_dtype_parent_idx = ConvertToNpuDtype(parent_idx->type()); + auto dst_dtype_parent_idx = + ConvertToNpuDtype(framework::TransToProtoVarType(parent_idx->type())); const auto& runner_cast_parent_idx = NpuOpRunner("Cast", {tmp_parent_idx}, {*parent_idx}, {{"dst_type", static_cast(dst_dtype_parent_idx)}}); diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index ec78b2dd7b012..0b0584608a300 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/cross_entropy.h" +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace platform { @@ -112,7 +113,8 @@ class CrossEntropyFunctor { } else { HardLabelCrossEntropyCPUFunctorImpl functor_impl( out, prob, labels, ignore_index, axis_dim); - framework::VisitIntDataType(labels->type(), functor_impl); + framework::VisitIntDataType( + framework::TransToProtoVarType(labels->dtype()), functor_impl); } } }; diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index a13adb63fdc54..829ac9fb55964 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/math.h" #include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" @@ -122,7 +123,8 @@ class CrossEntropyFunctor { HardLabelCrossEntropyCUDAFunctorImpl functor( loss_data, prob_data, labels->data(), batch_size, class_num, ignore_index, kMaxBlockDim, ctx.stream()); - framework::VisitDataType(labels->type(), functor); + framework::VisitDataType(framework::TransToProtoVarType(labels->dtype()), + functor); } } }; diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 2cfff0ae88ff6..9ce615c949ffc 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -108,7 +108,8 @@ struct MatrixEighFunctor { ValueType *rwork_data = nullptr; // complex type - if (framework::IsComplexType(input.type())) { + if (framework::IsComplexType( + framework::TransToProtoVarType(input.dtype()))) { lrwork = std::max(1, static_cast(rwork_opt)); rwork_data = rwork_tensor.mutable_data( framework::make_ddim({lrwork}), ctx.GetPlace()); @@ -180,7 +181,8 @@ struct MatrixEighFunctor { // When the input type is float32, and the feature value input dimension is // greater than or equal to [*,32,32] and less than or equal to // [*,512,512], Syevj has better performance. - bool use_syevj = (input.type() == framework::proto::VarType::FP32 && + bool use_syevj = (framework::TransToProtoVarType(input.dtype()) == + framework::proto::VarType::FP32 && values_stride >= 32 && values_stride <= 512); syevjInfo_t syevj_params; if (use_syevj) { diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc index ee6610eae1469..f23c3f14c5c9a 100644 --- a/paddle/fluid/operators/math/matrix_solve.cu.cc +++ b/paddle/fluid/operators/math/matrix_solve.cu.cc @@ -65,7 +65,7 @@ class MatrixSolveFunctor { // copy input A to a temporary tensor tmp_a, // LU factorization, written back to original matrix A, so in the beginning, // it's necessary to create a temporary tensor tmp_a. - Tensor tmp_a(a.type()); + Tensor tmp_a(a.dtype()); tmp_a.Resize(a.dims()); tmp_a.mutable_data(context.GetPlace()); framework::TensorCopy(a, context.GetPlace(), &tmp_a); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 6b24f4778442b..9f0f8a96c7677 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -765,8 +765,9 @@ class MatMulOp : public framework::OperatorWithKernel { const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/matmul_op_npu.cc b/paddle/fluid/operators/matmul_op_npu.cc index 1f2626b5f1ea3..f97921b561f96 100644 --- a/paddle/fluid/operators/matmul_op_npu.cc +++ b/paddle/fluid/operators/matmul_op_npu.cc @@ -32,7 +32,7 @@ static void Mul(const framework::ExecutionContext& ctx, const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {*Out}, {}); runner_dx.Run(stream); } else { - Tensor Out_temp(Out->type()); + Tensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {Out_temp}, {}); runner_dx.Run(stream); @@ -53,7 +53,7 @@ static void Dot(const framework::ExecutionContext& ctx, const auto& runner = NpuOpRunner("Dot", {X, Y}, {*Out}); runner.Run(stream); } else { - Tensor Out_temp(Out->type()); + Tensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("Dot", {X, Y}, {Out_temp}); out_temp_runner.Run(stream); @@ -77,7 +77,7 @@ static void MatMul2D(const framework::ExecutionContext& ctx, {{"transpose_x1", trans_x}, {"transpose_x2", trans_y}}); runner.Run(stream); } else { - Tensor Out_temp(Out->type()); + Tensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("MatMul", {X, Y}, {Out_temp}, @@ -103,7 +103,7 @@ static void MatMulND(const framework::ExecutionContext& ctx, {{"adj_x1", trans_x}, {"adj_x2", trans_y}}); runner.Run(stream); } else { - Tensor Out_temp(Out->type()); + Tensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("BatchMatMul", {X, Y}, {Out_temp}, @@ -235,7 +235,7 @@ class MatMulNPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->type()); + Tensor x_temp_brd(X->dtype()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(framework::make_ddim(x_broadcast_dims)); @@ -250,7 +250,7 @@ class MatMulNPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->type()); + Tensor y_temp_brd(Y->dtype()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(framework::make_ddim(y_broadcast_dims)); @@ -293,7 +293,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { // Case 1: [K] x [K] = [1] if (x_ndim == 1 && y_ndim == 1) { - Tensor dout_temp(dOut->type()); + Tensor dout_temp(dOut->dtype()); dout_temp.Resize(X->dims()); dout_temp.mutable_data(ctx.GetPlace()); NpuOpRunner runner; @@ -395,7 +395,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->type()); + Tensor x_temp_brd(X->dtype()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(framework::make_ddim(x_broadcast_dims)); @@ -410,7 +410,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->type()); + Tensor y_temp_brd(Y->dtype()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(framework::make_ddim(y_broadcast_dims)); @@ -435,7 +435,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { !transpose_y, alpha); } } else { - Tensor dx_temp(X->type()); + Tensor dx_temp(X->dtype()); dx_temp.Resize(framework::make_ddim(x_broadcast_dims)); if (transpose_x) { MatMulND(ctx, stream, y_temp_brd, dout_temp, &dx_temp, transpose_y, @@ -457,7 +457,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { false, alpha); } } else { - Tensor dy_temp(Y->type()); + Tensor dy_temp(Y->dtype()); dy_temp.Resize(framework::make_ddim(y_broadcast_dims)); if (transpose_y) { MatMulND(ctx, stream, dout_temp, x_temp_brd, &dy_temp, true, diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index f7a5e2a8af409..eaec4b78f4fc0 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -272,8 +272,9 @@ class MatMulV2Op : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -367,8 +368,9 @@ class MatMulV2OpGrad : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } else { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/matrix_power_op.h b/paddle/fluid/operators/matrix_power_op.h index 93755b22bf93a..58a8ef87628fe 100644 --- a/paddle/fluid/operators/matrix_power_op.h +++ b/paddle/fluid/operators/matrix_power_op.h @@ -108,7 +108,7 @@ void MatrixPowerFunction(const Tensor* X, const int n, Tensor* Out, // Calculate Out = newX^{n} for abs(n) > 4 with time complexity as O(logN) int bit = 0; - Tensor z = Tensor(X->type()); + Tensor z = Tensor(X->dtype()); bool out_inited = false; Tensor temp_out = ctx.AllocateTmpTensor(X->dims(), dev_ctx); Tensor temp_z = ctx.AllocateTmpTensor(X->dims(), dev_ctx); diff --git a/paddle/fluid/operators/mean_op_mlu.cc b/paddle/fluid/operators/mean_op_mlu.cc index ca4f3dcc3f465..c25b25aa50ea4 100644 --- a/paddle/fluid/operators/mean_op_mlu.cc +++ b/paddle/fluid/operators/mean_op_mlu.cc @@ -45,10 +45,12 @@ class MeanMLUKernel : public framework::OpKernel { reduce_dims.push_back(i); } - MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(input->type())); - MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(output->type())); + MLUCnnlTensorDesc input_desc( + *input, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(input->dtype()))); + MLUCnnlTensorDesc output_desc( + *output, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(output->dtype()))); MLUCnnlReduceDesc reduction_desc( reduce_dims, CNNL_REDUCE_AVG, ToCnnlDataType(), @@ -88,18 +90,21 @@ class MeanMLUGradKernel : public framework::OpKernel { } // means - Tensor mean_var(output_grad->type()); + Tensor mean_var(framework::TransToProtoVarType(output_grad->dtype())); mean_var.mutable_data(input_grad->dims(), context.GetPlace()); - MLUCnnlTensorDesc mean_var_desc(mean_var, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(mean_var.type())); + MLUCnnlTensorDesc mean_var_desc( + mean_var, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(mean_var.dtype()))); auto value = static_cast(1.0 / static_cast(input_grad->numel())); MLUCnnl::Fill(context, value, mean_var_desc.get(), GetBasePtr(&mean_var)); // means mul output_grad - MLUCnnlTensorDesc in_desc(*output_grad, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(output_grad->type())); - MLUCnnlTensorDesc out_desc(*input_grad, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(input_grad->type())); + MLUCnnlTensorDesc in_desc( + *output_grad, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(output_grad->dtype()))); + MLUCnnlTensorDesc out_desc( + *input_grad, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(input_grad->dtype()))); MLUCnnlOpTensorDesc op_tensor_desc(CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); diff --git a/paddle/fluid/operators/mean_op_npu.cc b/paddle/fluid/operators/mean_op_npu.cc index d83ed37aa7b5d..d81594658044a 100644 --- a/paddle/fluid/operators/mean_op_npu.cc +++ b/paddle/fluid/operators/mean_op_npu.cc @@ -59,20 +59,20 @@ class MeanGradNPUKernel : public framework::OpKernel { IG->mutable_data(context.GetPlace()); // ones - Tensor ones(grad->type()); + Tensor ones(grad->dtype()); ones.mutable_data(IG->dims(), context.GetPlace()); const auto& runner_ones = NpuOpRunner("OnesLike", {*IG}, {ones}, {}); runner_ones.Run(stream); // means - Tensor mean_tensor(grad->type()); + Tensor mean_tensor(grad->dtype()); mean_tensor.Resize({1}); mean_tensor.mutable_data(context.GetPlace()); FillNpuTensorWithConstant( &mean_tensor, static_cast(1.0 / static_cast(IG->numel()))); // means mul ones - Tensor mean_ma(grad->type()); + Tensor mean_ma(grad->dtype()); mean_ma.Resize(IG->dims()); mean_ma.mutable_data(context.GetPlace()); const auto& runner_mul_1 = diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h index 6d1b8eee04306..0a3921c9f6759 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.h +++ b/paddle/fluid/operators/memcpy_h2d_op.h @@ -46,7 +46,7 @@ class MemcpyH2DFunctor { auto stream = nullptr; #endif out_tensor.mutable_data( - dev_ctx_.GetPlace(), lod_tensor.type(), + dev_ctx_.GetPlace(), lod_tensor.dtype(), pten::Stream(reinterpret_cast(stream))); if (dst_place_type_ == 0 || dst_place_type_ == 1) { diff --git a/paddle/fluid/operators/meshgrid_op.cc b/paddle/fluid/operators/meshgrid_op.cc index 54600e26bb57f..1872547d2ae56 100644 --- a/paddle/fluid/operators/meshgrid_op.cc +++ b/paddle/fluid/operators/meshgrid_op.cc @@ -59,7 +59,7 @@ class MeshgridOp : public framework::OperatorWithKernel { bool flag = 0; for (auto* input : inputs) { if (input->IsInitialized() && input->numel() > 0) { - input_data_type = input->type(); + input_data_type = framework::TransToProtoVarType(input->dtype()); flag = 1; break; } diff --git a/paddle/fluid/operators/meshgrid_op_npu.cc b/paddle/fluid/operators/meshgrid_op_npu.cc index 6b16d919a8904..dab694b03b427 100644 --- a/paddle/fluid/operators/meshgrid_op_npu.cc +++ b/paddle/fluid/operators/meshgrid_op_npu.cc @@ -54,7 +54,7 @@ class MeshgridNPUKernel : public framework::OpKernel { view_shape[i] = shape[i]; framework::DDim out_dims_reshape = framework::make_ddim(view_shape); - framework::Tensor reshape_ins_tensor(ins[i]->type()); + framework::Tensor reshape_ins_tensor(ins[i]->dtype()); reshape_ins_tensor.ShareDataWith(*ins[i]); reshape_ins_tensor.Resize(out_dims_reshape); diff --git a/paddle/fluid/operators/metrics/accuracy_op_npu.cc b/paddle/fluid/operators/metrics/accuracy_op_npu.cc index 4b81296dd1a93..e279dc5815338 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_npu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_npu.cc @@ -40,11 +40,12 @@ class AccuracyNPUKernel : public framework::OpKernel { } // cast `indices` or `label` if their type is not consistent - Tensor cast_indices(framework::proto::VarType::INT32); - Tensor cast_label(framework::proto::VarType::INT32); - if (indices->type() != label->type()) { + Tensor cast_indices(experimental::DataType::INT32); + Tensor cast_label(experimental::DataType::INT32); + if (indices->dtype() != label->dtype()) { auto dst_dtype = ConvertToNpuDtype(framework::proto::VarType::INT32); - if (indices->type() != framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(indices->dtype()) != + framework::proto::VarType::INT32) { cast_indices.Resize(indices->dims()); cast_indices.mutable_data(ctx.GetPlace()); const auto& runner_cast_indices = @@ -54,7 +55,8 @@ class AccuracyNPUKernel : public framework::OpKernel { } else { cast_indices.ShareDataWith(*indices); } - if (label->type() != framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(label->dtype()) != + framework::proto::VarType::INT32) { cast_label.Resize(label->dims()); cast_label.mutable_data(ctx.GetPlace()); const auto& runner_cast_label = @@ -70,7 +72,7 @@ class AccuracyNPUKernel : public framework::OpKernel { } // equal - Tensor tmp_equal(framework::proto::VarType::BOOL); + Tensor tmp_equal(experimental::DataType::BOOL); tmp_equal.Resize(inference->dims()); tmp_equal.mutable_data(ctx.GetPlace()); const auto& runner_equal = @@ -78,18 +80,19 @@ class AccuracyNPUKernel : public framework::OpKernel { runner_equal.Run(stream); // cast equal - Tensor tmp_equal_cast(framework::proto::VarType::FP32); + Tensor tmp_equal_cast(experimental::DataType::FLOAT32); tmp_equal_cast.Resize(inference->dims()); tmp_equal_cast.mutable_data(ctx.GetPlace()); const auto& runner_cast_equal = NpuOpRunner( "Cast", {tmp_equal}, {tmp_equal_cast}, {{"dst_type", - static_cast(ConvertToNpuDtype(tmp_equal_cast.type()))}}); + static_cast(ConvertToNpuDtype( + framework::TransToProtoVarType(tmp_equal_cast.dtype())))}}); runner_cast_equal.Run(stream); // [correct] // reduce_max - Tensor tmp_correct_max(framework::proto::VarType::FP32); + Tensor tmp_correct_max(experimental::DataType::FLOAT32); tmp_correct_max.Resize(framework::make_ddim({num_samples})); tmp_correct_max.mutable_data(ctx.GetPlace()); const auto& runner_reduce_max = @@ -98,7 +101,7 @@ class AccuracyNPUKernel : public framework::OpKernel { runner_reduce_max.Run(stream); // reduce_sum - Tensor tmp_correct(framework::proto::VarType::FP32); + Tensor tmp_correct(experimental::DataType::FLOAT32); tmp_correct.Resize(correct->dims()); tmp_correct.mutable_data(ctx.GetPlace()); const auto& runner_reduce_sum = @@ -110,7 +113,8 @@ class AccuracyNPUKernel : public framework::OpKernel { correct->mutable_data(ctx.GetPlace()); const auto& runner_cast_correct = NpuOpRunner( "Cast", {tmp_correct}, {*correct}, - {{"dst_type", static_cast(ConvertToNpuDtype(correct->type()))}}); + {{"dst_type", static_cast(ConvertToNpuDtype( + framework::TransToProtoVarType(correct->dtype())))}}); runner_cast_correct.Run(stream); // [total] @@ -118,7 +122,7 @@ class AccuracyNPUKernel : public framework::OpKernel { FillNpuTensorWithConstant(total, static_cast(num_samples)); // use `total` of type `float32` for calculating accuracy - Tensor tmp_total(framework::proto::VarType::FP32); + Tensor tmp_total(experimental::DataType::FLOAT32); tmp_total.Resize(total->dims()); tmp_total.mutable_data(ctx.GetPlace()); FillNpuTensorWithConstant(&tmp_total, diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index cd92aaf0a108c..b4f801f11f99d 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -61,7 +61,8 @@ class ConcatMKLDNNHandler concat_axis = concat_axis + rank; } - memory::data_type dt = framework::ToMKLDNNDataType(inputs[0]->type()); + memory::data_type dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(inputs[0]->dtype())); std::vector srcs_md; srcs_md.reserve(inputs.size()); @@ -185,10 +186,11 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector offset(dout_vec_dims.size(), 0); - dnnl::memory::data_type dout_type = - framework::ToMKLDNNDataType(dout->type()); - platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), - dout_type, onednn_engine); + dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(dout->dtype())); + platform::ReorderMKLDNNHandler reorder_handler( + dout_vec_dims, framework::TransToProtoVarType(dout->dtype()), dout_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( dout->format(), platform::to_void_cast(dout->data())); diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index eef38bf99b136..c5215751c8325 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -48,14 +48,16 @@ static dnnl::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, dst_dt = dnnl::memory::data_type::f32; } if (fuse_residual_conn && residual_param) { - auto residual_dt = framework::ToMKLDNNDataType(residual_param->type()); + auto residual_dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(residual_param->dtype())); if (dst_dt != residual_dt) dst_dt = residual_dt; } } else { if (!force_fp32_output && is_bfloat16) { dst_dt = dnnl::memory::data_type::bf16; if (fuse_residual_conn && residual_param) { - dst_dt = framework::ToMKLDNNDataType(residual_param->type()); + dst_dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(residual_param->dtype())); } } } @@ -202,7 +204,8 @@ class ConvMKLDNNHandlerT dnnl::memory::desc src_md, weights_md; if (platform::is_int8()) { src_md = platform::MKLDNNMemDesc( - src_tz, framework::ToMKLDNNDataType(input->type()), + src_tz, framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())), chosen_memory_format); weights_md = platform::MKLDNNMemDesc( weights_tz, dnnl::memory::data_type::s8, chosen_memory_format); @@ -669,7 +672,8 @@ class ConvMKLDNNHandlerT std::shared_ptr AcquireResidualMemory( const framework::Tensor* residual_param) { void* residual_data = - residual_param->type() == framework::DataTypeTrait::DataType() + framework::TransToProtoVarType(residual_param->dtype()) == + framework::DataTypeTrait::DataType() ? platform::to_void_cast(residual_param->data()) : platform::to_void_cast(residual_param->data()); auto residual_mem_p = this->AcquireMemory("@user_residual_data_mem_p"); @@ -679,7 +683,8 @@ class ConvMKLDNNHandlerT } else { auto user_residual_md = platform::MKLDNNMemDesc( framework::vectorize(residual_param->dims()), - framework::ToMKLDNNDataType(residual_param->type()), + framework::ToMKLDNNDataType( + framework::TransToProtoVarType(residual_param->dtype())), residual_param->format()); return this->AcquireMemoryFromPrimitive(user_residual_md, residual_data, @@ -951,8 +956,8 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { // For convolution with groups convert from blocked to NCHW // otherwise there will be problems in next operators working on this data if (g > 1) { - dnnl::memory::data_type in_type = - framework::ToMKLDNNDataType(filter->type()); + dnnl::memory::data_type in_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(filter->dtype())); // for 3d conv with groups (six dimensional data reorder to goidhw) // for 2d conv with groups (five dimensional data reorder to goihw) // auto weights_tz = framework::vectorize(filter->dims()); @@ -961,8 +966,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { dnnl::memory::format_tag out_format = weights_tz.size() == 6 ? dnnl::memory::format_tag::goidhw : dnnl::memory::format_tag::goihw; - platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(), - in_type, mkldnn_engine); + platform::ReorderMKLDNNHandler handler( + weights_tz, framework::TransToProtoVarType(filter->dtype()), + in_type, mkldnn_engine); auto reorder_dst_memory_p = handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace()); diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index d4d78bdfb6613..e5ac28d4a5043 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -64,8 +64,8 @@ class DeQuantOpKernel : public framework::OpKernel { auto src_tz = paddle::framework::vectorize(input->dims()); auto dst_tz = paddle::framework::vectorize(output->dims()); - dnnl::memory::data_type src_dt = - paddle::framework::ToMKLDNNDataType(input->type()); + dnnl::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())); MKLDNNMemoryFormat src_fmt = input->format(); std::string key = diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index b1be0f0f8fb44..6c9d431d1c9fd 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" @@ -75,7 +76,7 @@ class ExpandMKLDNNKernel : public paddle::framework::OpKernel { private: dnnl::memory::format_tag GetExtendedFormatTag( - std::vector& dims, int new_size, + std::vector& dims, int new_size, // NOLINT dnnl::memory::format_tag format_tag) const { dnnl::memory::desc md(dims, paddle::platform::MKLDNNGetDataType(), format_tag); @@ -112,10 +113,11 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { auto& astream = MKLDNNDeviceContext::tls().get_stream(); if (dout_vec_dims == dx_vec_dims) { - dnnl::memory::data_type dout_type = - paddle::framework::ToMKLDNNDataType(dout->type()); + dnnl::memory::data_type dout_type = paddle::framework::ToMKLDNNDataType( + paddle::framework::TransToProtoVarType(dout->dtype())); paddle::platform::ReorderMKLDNNHandler reorder_handler( - dout_vec_dims, dout->type(), dout_type, onednn_engine); + dout_vec_dims, paddle::framework::TransToProtoVarType(dout->dtype()), + dout_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( dout->format(), paddle::platform::to_void_cast(dout->data())); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index d7353d2def7fc..8efd2b226cad6 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h" #include +#include "paddle/fluid/framework/convert_utils.h" using dnnl::memory; using dnnl::primitive; @@ -56,10 +57,11 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx, auto output_dims = vectorize(output.dims()); - memory::data_type input_type = - paddle::framework::ToMKLDNNDataType(input->type()); + memory::data_type input_type = paddle::framework::ToMKLDNNDataType( + paddle::framework::TransToProtoVarType(input->dtype())); paddle::platform::ReorderMKLDNNHandler reorder_handler( - output_dims, input->type(), input_type, dev_ctx.GetEngine()); + output_dims, paddle::framework::TransToProtoVarType(input->dtype()), + input_type, dev_ctx.GetEngine()); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( memory::format_tag::abc, diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 1b9d9b8f31d35..bc2dbf5696813 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -310,9 +310,10 @@ std::shared_ptr> GetPrimitiveFactory( const Tensor *input_x, const Tensor *input_y, const dnnl::engine &mkldnn_engine) { std::string key = platform::CreateKey( - dev_ctx, input_x->type(), framework::vectorize(input_x->dims()), - input_y->type(), framework::vectorize(input_y->dims()), - ctx.OutputName("Out")); + dev_ctx, framework::TransToProtoVarType(input_x->dtype()), + framework::vectorize(input_x->dims()), + framework::TransToProtoVarType(input_y->dtype()), + framework::vectorize(input_y->dims()), ctx.OutputName("Out")); key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key); auto prim_creator = std::static_pointer_cast>( diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index eb0240e3e3d46..f50d183521c36 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -94,7 +94,8 @@ class PoolingMKLDNNHandler const auto is_test = ctx.Attr("is_test"); - const auto dt = framework::ToMKLDNNDataType(input->type()); + const auto dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())); const auto exclude_padding = ctx.Attr("exclusive"); @@ -183,7 +184,8 @@ class PoolingMKLDNNHandler auto diff_src_tz = paddle::framework::vectorize(in_x_grad->dims()); auto diff_dst_tz = paddle::framework::vectorize(out_grad->dims()); - const auto dt = framework::ToMKLDNNDataType(in_x->type()); + const auto dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(in_x->dtype())); auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format()); auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any); auto diff_dst_md = dnnl::memory::desc( diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 8d14de6f7c969..1b1bd69aec2f4 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -52,7 +52,8 @@ class ReQuantOpKernel : public framework::OpKernel { "Scale of output cannot be 0.0")); if (shift_in != 0.0f) { PADDLE_ENFORCE_EQ( - input->type(), framework::proto::VarType::UINT8, + framework::TransToProtoVarType(input->dtype()), + framework::proto::VarType::UINT8, platform::errors::Unimplemented("Requantize does not support nonzero " "shift for signed input.")); } @@ -81,7 +82,8 @@ class ReQuantOpKernel : public framework::OpKernel { if (reorder_p == nullptr) { auto dst_tz = framework::vectorize(output->dims()); - auto src_dt = framework::ToMKLDNNDataType(input->type()); + auto src_dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())); auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt; auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, input->format()); diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index 06142e95532c5..744fd34062a36 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -74,9 +74,11 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { auto x_vec_dims = framework::vectorize(x_dims); - dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); - platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), - x_type, onednn_engine); + dnnl::memory::data_type x_type = + framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype())); + platform::ReorderMKLDNNHandler reorder_handler( + x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); @@ -99,8 +101,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferInOutShape(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT switch (op_name) { case ReshapeKernelOpName::reshape: InferShapeReshapeOp(ctx, x_dims, out_dims); @@ -127,8 +129,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferShapeReshapeOp(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); x_dims = x->dims(); @@ -137,8 +139,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferShapeReshape2Op(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto* out = ctx.Output("Out"); auto* xshape = ctx.Output("XShape"); auto xshape_dims = xshape->dims(); @@ -149,9 +151,10 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { // in reshape1/2 ops "ShapeTensor" has highest priority and "Shape" has // second highest priority - void ChangeReshapeOutDimsIfNeeded(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + void ChangeReshapeOutDimsIfNeeded( + const framework::ExecutionContext& ctx, + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto list_new_shape_tensor = ctx.MultiInput("ShapeTensor"); if (list_new_shape_tensor.size() > 0) { auto new_shape = extract_shape(list_new_shape_tensor); @@ -167,8 +170,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferShapeSqueezeOp(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto* x = ctx.Input("X"); x_dims = x->dims(); const auto& axes = ctx.Attr>("axes"); @@ -176,8 +179,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferShapeSqueeze2Op(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto* out = ctx.Output("Out"); auto* xshape = ctx.Output("XShape"); auto xshape_dims = xshape->dims(); @@ -186,8 +189,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } void InferShapeFlattenOp(const framework::ExecutionContext& ctx, - framework::DDim& x_dims, - framework::DDim& out_dims) const { + framework::DDim& x_dims, // NOLINT + framework::DDim& out_dims) const { // NOLINT auto x = ctx.Input("X"); x_dims = x->dims(); auto axes = ctx.Attr("axis"); @@ -324,10 +327,11 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { auto dout_vec_dims = framework::vectorize(dout->dims()); - dnnl::memory::data_type dout_type = - framework::ToMKLDNNDataType(dout->type()); - platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(), - dout_type, onednn_engine); + dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(dout->dtype())); + platform::ReorderMKLDNNHandler reorder_handler( + dout_vec_dims, framework::TransToProtoVarType(dout->dtype()), dout_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( dout->format(), platform::to_void_cast(dout->data())); @@ -347,7 +351,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { } void InferOutputShapeInGrad(const framework::ExecutionContext& ctx, - framework::DDim& x_dims) const { + framework::DDim& x_dims) const { // NOLINT switch (op_name) { case ReshapeKernelOpName::reshape: InferShapeReshapeSqueezeGradOp(ctx, x_dims); @@ -373,20 +377,22 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { } } - void InferShapeReshapeSqueezeGradOp(const framework::ExecutionContext& ctx, - framework::DDim& dx_dims) const { + void InferShapeReshapeSqueezeGradOp( + const framework::ExecutionContext& ctx, + framework::DDim& dx_dims) const { // NOLINT auto* dx = ctx.Output(framework::GradVarName("X")); dx_dims = dx->dims(); } void InferShapeReshape2Squeeze2Flatten2GradOp( - const framework::ExecutionContext& ctx, framework::DDim& dx_dims) const { + const framework::ExecutionContext& ctx, + framework::DDim& dx_dims) const { // NOLINT auto xshape_dims = ctx.Input("XShape")->dims(); dx_dims = framework::slice_ddim(xshape_dims, 1, xshape_dims.size()); } void InferShapeFlattenGradOp(const framework::ExecutionContext& ctx, - framework::DDim& dx_dims) const { + framework::DDim& dx_dims) const { // NOLINT dx_dims = ctx.Input("X")->dims(); } }; diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc index e5f70fa10e375..d0059571cdc9b 100644 --- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc @@ -97,10 +97,12 @@ class SliceMKLDNNKernel : public framework::OpKernel { out->Resize(framework::make_ddim(slice_dims)); - dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); + dnnl::memory::data_type x_type = + framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype())); - platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), - x_type, onednn_engine); + platform::ReorderMKLDNNHandler reorder_handler( + x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); @@ -192,15 +194,16 @@ class SliceGradMKLDNNKernel : public framework::OpKernel { slice_dims[axes[i]] = ends[i] - starts[i]; } - dnnl::memory::data_type dout_type = - framework::ToMKLDNNDataType(dout->type()); + dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(dout->dtype())); dnnl::memory::desc md(dout_vec_dims, platform::MKLDNNGetDataType(), dout->format()); dnnl::memory::format_tag reorder_format_tag = platform::GetMKLDNNFormat(md.reshape(slice_dims)); - platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(), - dout_type, onednn_engine); + platform::ReorderMKLDNNHandler reorder_handler( + slice_dims, framework::TransToProtoVarType(dout->dtype()), dout_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( reorder_format_tag, platform::to_void_cast(dout->data())); diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc index a8e1e6e8a0e4c..f87b75f57d4d0 100644 --- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc @@ -90,14 +90,16 @@ class SplitMKLDNNKernel : public framework::OpKernel { auto x_vec_dims = framework::vectorize(x_dims); - dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type()); + dnnl::memory::data_type x_type = + framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype())); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); std::vector offset(x_vec_dims.size(), 0); - platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(), - x_type, onednn_engine); + platform::ReorderMKLDNNHandler reorder_handler( + x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type, + onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( x->format(), platform::to_void_cast(x->data())); diff --git a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc index ca3aab9afc05b..ad9f8b3aa79ce 100644 --- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc @@ -46,7 +46,8 @@ class StackMKLDNNHandler // in stack op all inputs must have same dims auto input_dims = framework::vectorize(inputs[0]->dims()); - memory::data_type dt = framework::ToMKLDNNDataType(inputs[0]->type()); + memory::data_type dt = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(inputs[0]->dtype())); std::vector srcs_md; memory::desc dst_md; MKLDNNMemoryFormat dst_fmt; diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 0c442f2fe4d59..5df2a546812ad 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -165,7 +165,9 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { auto& in_out = in_vars[0]->Get(); auto output_tz = framework::vectorize(output->dims()); platform::ReorderMKLDNNHandler reorder_handler( - output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()), + output_tz, framework::TransToProtoVarType(output->dtype()), + framework::ToMKLDNNDataType( + framework::TransToProtoVarType(in_out.dtype())), dev_ctx.GetEngine()); auto target_mem = reorder_handler.AcquireDstMemory( diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 50071820e62c8..b1001b4e5684b 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -177,8 +177,9 @@ MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, } MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor) - : MLUCnnlTensorDesc(tensor, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(tensor.type())) {} + : MLUCnnlTensorDesc( + tensor, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(tensor.dtype()))) {} MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, cnnlTensorLayout_t layout, diff --git a/paddle/fluid/operators/mul_op_npu.cc b/paddle/fluid/operators/mul_op_npu.cc index 97b1ef0010f98..5a9696ec8ccd1 100644 --- a/paddle/fluid/operators/mul_op_npu.cc +++ b/paddle/fluid/operators/mul_op_npu.cc @@ -68,8 +68,10 @@ class MulNPUKernel : public framework::OpKernel { platform::errors::InvalidArgument( "now only support x_num_col_dims == 2: but got %d", x_num_col_dims)); - if (x->type() == framework::proto::VarType::FP16 && - y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(y->dtype()) == + framework::proto::VarType::FP16) { // NOTE: When the dim of the input and output shapes is inconsistent, // (Boradcast) BatchMatMul NPU OP only support FP16. out->mutable_data(ctx.GetPlace()); @@ -187,8 +189,10 @@ class MulGradNPUKernel : public framework::OpKernel { if (dx) { // tmp_dout * y [2, 3, 5] * [4,5] => [2, 3, 4] - if (dout->type() == framework::proto::VarType::FP16 && - y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(dout->dtype()) == + framework::proto::VarType::FP16 && + framework::TransToProtoVarType(y->dtype()) == + framework::proto::VarType::FP16) { // NOTE: When the dim of the input and output shapes is inconsistent, // (Boradcast) BatchMatMul NPU OP only support FP16. dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc index 57b6c92d2836c..24b506ebf8a06 100644 --- a/paddle/fluid/operators/one_hot_op_npu.cc +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -43,7 +43,8 @@ class OneHotNPUKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); float on_value = 1.0f, off_value = 0.0f; - if (in->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(in->dtype()) == + framework::proto::VarType::INT32) { NpuOpRunner runner; runner.SetType("OneHot") .AddInput(*in) diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc index 34568c3c19e66..acf6baf50b418 100644 --- a/paddle/fluid/operators/one_hot_v2_op_npu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc @@ -42,7 +42,8 @@ class OneHotV2NPUKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); float on_value = 1.0f, off_value = 0.0f; - if (in->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(in->dtype()) == + framework::proto::VarType::INT32) { NpuOpRunner runner; runner.SetType("OneHot") .AddInput(*in) diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index c1846f148fd92..56c5d48b9f497 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -122,9 +122,9 @@ class AdamNPUKernel : public framework::OpKernel { const Tensor* beta2_tensor = nullptr; const Tensor* epsilon_tensor = nullptr; - Tensor beta1_tmp(framework::proto::VarType::FP32); - Tensor beta2_tmp(framework::proto::VarType::FP32); - Tensor epsilon_tmp(framework::proto::VarType::FP32); + Tensor beta1_tmp(experimental::DataType::FLOAT32); + Tensor beta2_tmp(experimental::DataType::FLOAT32); + Tensor epsilon_tmp(experimental::DataType::FLOAT32); if (ctx.HasInput("Beta1Tensor")) { beta1_tensor = ctx.Input("Beta1Tensor"); @@ -255,9 +255,9 @@ class AdamWNPUKernel : public AdamNPUKernel { ctx.template device_context() .stream(); - Tensor one(framework::proto::VarType::FP32); - Tensor decay(framework::proto::VarType::FP32); - Tensor tmp(framework::proto::VarType::FP32); + Tensor one(experimental::DataType::FLOAT32); + Tensor decay(experimental::DataType::FLOAT32); + Tensor tmp(experimental::DataType::FLOAT32); tmp.mutable_data({1}, place); one.mutable_data({1}, place); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc index 4ee761cd6fad3..12aa56ebb5c7c 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc @@ -50,15 +50,15 @@ class RMSPROPNPUKernel : public framework::OpKernel { const Tensor *rho_tensor = nullptr; const Tensor *momentum_tensor = nullptr; const Tensor *epsilon_tensor = nullptr; - Tensor rho_tmp(framework::proto::VarType::FP32); + Tensor rho_tmp(experimental::DataType::FLOAT32); rho_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&rho_tmp, rho); rho_tensor = &rho_tmp; - Tensor momentum_tmp(framework::proto::VarType::FP32); + Tensor momentum_tmp(experimental::DataType::FLOAT32); momentum_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&momentum_tmp, momentum); momentum_tensor = &momentum_tmp; - Tensor epsilon_tmp(framework::proto::VarType::FP32); + Tensor epsilon_tmp(experimental::DataType::FLOAT32); epsilon_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&epsilon_tmp, epsilon); epsilon_tensor = &epsilon_tmp; diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index 08c40e02b1702..c3cf7da189d55 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -97,8 +97,9 @@ class SGDOp : public framework::OperatorWithKernel { const std::string &var_name, const framework::Tensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (var_name == "LearningRate") { - return framework::OpKernelType(tensor.type(), tensor.place(), - tensor.layout()); + return framework::OpKernelType( + framework::TransToProtoVarType(tensor.dtype()), tensor.place(), + tensor.layout()); } return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index 23e37ea27b54f..a35fd36a295df 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -17,6 +17,7 @@ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" @@ -286,7 +287,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { const bool multi_precision = ctx.Attr("multi_precision"); bool use_nesterov = ctx.Attr("use_nesterov"); auto index = ctx.Input("Index"); - const auto& index_type = index->type(); + const auto& index_type = framework::TransToProtoVarType(index->dtype()); if (multi_precision) { if (use_nesterov) { auto update_method = UseNesterov(); @@ -354,7 +355,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { Tensor cpu_axis; const Tensor* axis_tensor = ctx.Input("Axis"); framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis); - const auto& axis_type = axis_tensor->type(); + const auto& axis_type = + framework::TransToProtoVarType(axis_tensor->dtype()); if (axis_type == framework::proto::VarType::INT32) { axis = static_cast(cpu_axis.data()[0]); } else if (axis_type == framework::proto::VarType::INT64) { diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index bc6545b276ae7..fedadc7581e71 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -88,7 +88,7 @@ class PartialConcatOp : public framework::OperatorWithKernel { bool flag = 0; for (auto *input : inputs) { if (input->IsInitialized() && input->numel() > 0) { - input_data_type = input->type(); + input_data_type = framework::TransToProtoVarType(input->dtype()); flag = 1; break; } diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index fb60ab54b77b2..242ee2f9cc97a 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -90,7 +90,7 @@ class PartialSumOp : public framework::OperatorWithKernel { bool flag = 0; for (auto *input : inputs) { if (input->IsInitialized() && input->numel() > 0) { - input_data_type = input->type(); + input_data_type = framework::TransToProtoVarType(input->dtype()); flag = 1; break; } diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc index b7049019bc4be..a4d855f00bd61 100644 --- a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc @@ -96,11 +96,12 @@ void InitTensorsOnClient(framework::Scope* scope, int64_t rows_numel, std::vector temp_vec{0}; float* temp_ptr = temp_vec.data(); - memory::Copy( - place, reinterpret_cast(micro_id_ptr), platform::CPUPlace(), - reinterpret_cast(temp_ptr), - micro_id_var->numel() * framework::SizeOfType(micro_id_var->type()), - stream); + memory::Copy(place, reinterpret_cast(micro_id_ptr), + platform::CPUPlace(), reinterpret_cast(temp_ptr), + micro_id_var->numel() * + framework::SizeOfType( + framework::TransToProtoVarType(micro_id_var->dtype())), + stream); auto x_var = scope->Var("x")->GetMutable(); float* x_ptr = @@ -110,7 +111,8 @@ void InitTensorsOnClient(framework::Scope* scope, int64_t rows_numel, float* x_vec_ptr = x_vec.data(); memory::Copy(place, reinterpret_cast(x_ptr), platform::CPUPlace(), reinterpret_cast(x_vec_ptr), - x_var->numel() * framework::SizeOfType(x_var->type()), stream); + x_var->numel() * framework::DataTypeSize(x_var->dtype()), + stream); // auto res_var = scope->Var("res")->GetMutable(); // float* res_ptr = diff --git a/paddle/fluid/operators/put_along_axis_op.cu b/paddle/fluid/operators/put_along_axis_op.cu index 800da8a275c2d..738ced5afb085 100644 --- a/paddle/fluid/operators/put_along_axis_op.cu +++ b/paddle/fluid/operators/put_along_axis_op.cu @@ -38,7 +38,7 @@ class PutAlongAxisCUDAKernel : public framework::OpKernel { auto result = ctx.Output("Result"); const platform::DeviceContext &device_ctx = ctx.device_context(); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); framework::TensorCopy(*input, ctx.GetPlace(), result); if (reduce_op == "add") { @@ -90,7 +90,7 @@ class PutAlongAxisGradOpCUDAKernel : public framework::OpKernel { auto result_grad = ctx.Input(framework::GradVarName("Result")); auto axis = ctx.Attr("Axis"); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (input_grad) { framework::TensorCopy(*result_grad, ctx.GetPlace(), input_grad); if (index_type == framework::proto::VarType::INT32) { diff --git a/paddle/fluid/operators/put_along_axis_op.h b/paddle/fluid/operators/put_along_axis_op.h index 0b4481ceacf73..c0be2584f4c3d 100644 --- a/paddle/fluid/operators/put_along_axis_op.h +++ b/paddle/fluid/operators/put_along_axis_op.h @@ -40,7 +40,7 @@ class PutAlongAxisOpKernel : public framework::OpKernel { framework::TensorCopy(*input, ctx.GetPlace(), result); const platform::DeviceContext &device_ctx = ctx.device_context(); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (reduce_op == "add") { if (index_type == framework::proto::VarType::INT32) { cpu_scatter_add_kernel(*result, axis, *index, *value, @@ -90,7 +90,7 @@ class PutAlongAxisGradOpKernel : public framework::OpKernel { auto index = ctx.Input("Index"); auto result_grad = ctx.Input(framework::GradVarName("Result")); auto axis = ctx.Attr("Axis"); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (input_grad) { framework::TensorCopy(*result_grad, ctx.GetPlace(), input_grad); diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index c50a8b731d5a5..ace99db1220b5 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/search_compute.h" @@ -391,7 +392,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { if (iter != iter_end) { exit(1); } - auto weight_type = _blobs_0->type(); + auto weight_type = framework::TransToProtoVarType(_blobs_0->dtype()); if (_is_training == 0 && weight_type != framework::proto::VarType::INT8) { axpy_noadd(top_data, top_data, top->dims()[0] * top->dims()[1], _drop_out_percent); diff --git a/paddle/fluid/operators/randint_op.cu b/paddle/fluid/operators/randint_op.cu index 40e390b0b8724..be6b6c53981cc 100644 --- a/paddle/fluid/operators/randint_op.cu +++ b/paddle/fluid/operators/randint_op.cu @@ -13,6 +13,7 @@ // limitations under the License. #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/uniform_random_op.h" @@ -45,7 +46,7 @@ class GPURandintKernel : public framework::OpKernel { T high = static_cast(context.Attr("high")) - 1; framework::LoDTensor tensor; tensor.Resize(out->dims()); - tensor.mutable_data(cpu, dtype); + tensor.mutable_data(cpu, framework::TransToPtenDataType(dtype)); T* data = tensor.mutable_data(cpu); int64_t size = out->numel(); diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index 4aad78f1c49cf..6393ff2135d1d 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/reader/buffered_reader.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -126,8 +127,8 @@ void BufferedReader::ReadAsync(size_t i) { cuda[i].set_layout(cpu[i].layout()); cuda_pinned_ptrs[i] = cuda[i].mutable_data(cuda_pinned_place, cpu[i].type()); - auto size = - cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type()); + auto size = cpu[i].numel() * + paddle::framework::DataTypeSize(cpu[i].dtype()); memory::Copy(cuda_pinned_place, cuda_pinned_ptrs[i], cpu[i].place(), cpu[i].data(), size); @@ -175,7 +176,7 @@ void BufferedReader::ReadAsync(size_t i) { auto cpu_ptr = cpu[i].data(); auto gpu_ptr = gpu_ptrs[i]; auto size = - cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type()); + cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype()); if (platform::is_cuda_pinned_place(cpu_place)) { memory::Copy(place_, gpu_ptr, cpu_place, cpu_ptr, size, stream_.get()); @@ -234,7 +235,7 @@ void BufferedReader::ReadAsync(size_t i) { auto cpu_ptr = cpu[i].data(); auto npu_ptr = npu_ptrs[i]; auto size = - cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type()); + cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype()); if ((platform::is_npu_place(cpu_place))) { memory::Copy(place_, npu_ptr, cpu_place, cpu_ptr, size, stream_.get()); diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc index 38894495b4ca0..73bc67287c278 100644 --- a/paddle/fluid/operators/reader/read_op.cc +++ b/paddle/fluid/operators/reader/read_op.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/platform/profiler.h" @@ -141,7 +142,7 @@ class ReadOp : public framework::OperatorBase { "shape = [%s], but received fed shape [%s]", out_arg_names[i], shapes[i].size(), shapes[i], in_dims)); PADDLE_ENFORCE_EQ( - ins[i].type(), var_types[i], + framework::TransToProtoVarType(ins[i].dtype()), var_types[i], platform::errors::InvalidArgument( "The data type of fed Variable %s must be %s, but received %s", out_arg_names[i], var_types[i], ins[i].type())); diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index fcfdb8b72ecde..944ae2dc62a55 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -265,7 +265,7 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, framework::LoDTensor *dst_tensor) { // create output tensor at begin dst_tensor->Resize(PrependDims(seq_len, src_tensor.dims())); - dst_tensor->mutable_data(place, src_tensor.type()); + dst_tensor->mutable_data(place, src_tensor.dtype()); auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); // Explicit copy output since the local RNN scope can be destroyed @@ -438,7 +438,8 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, auto &inside_tensor = cur_scope.FindVar(inside_grad_name)->Get(); framework::AttributeMap attrs; - attrs["dtype"] = inside_tensor.type(); + attrs["dtype"] = + framework::TransToProtoVarType(inside_tensor.dtype()); attrs["shape"] = framework::vectorize(inside_tensor.dims()); attrs["value"] = 0.0f; @@ -474,7 +475,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, } // Alloc outside memory outside->Resize(PrependDims(seq_len, inside.dims())); - outside->mutable_data(place, inside.type()); + outside->mutable_data(place, inside.dtype()); auto dst = outside->Slice(seq_offset, seq_offset + 1); framework::TensorCopy(inside, place, dev_ctx, &dst); @@ -492,7 +493,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, [&](const framework::LoDTensor &inside, framework::LoDTensor *outside) { outside->Resize(inside.dims()); - outside->mutable_data(place, inside.type()); + outside->mutable_data(place, inside.dtype()); framework::TensorCopy(inside, place, dev_ctx, outside); }, true /*is_backward*/); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h index ad7defd22d056..5dcb81c75407f 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/mkldnn_reuse.h" namespace paddle { @@ -21,11 +23,10 @@ using paddle::framework::LoDTensor; using paddle::framework::Tensor; using platform::to_void_cast; -inline std::vector CalculateReducedDims(const Tensor* input, - const Tensor* output, - std::vector& reduce_dims, - bool reduce_all, - bool keep_dim) { +inline std::vector CalculateReducedDims( + const Tensor* input, const Tensor* output, + std::vector& reduce_dims, // NOLINT + bool reduce_all, bool keep_dim) { if (keep_dim) return framework::vectorize(output->dims()); if (reduce_all) @@ -69,10 +70,11 @@ class ReduceMKLDNNKernel : public framework::OpKernel { // In that case reorder must be executed to maintain compatibility with // PaddlePaddle reduce op if (input_dims == output_dims) { - dnnl::memory::data_type input_type = - framework::ToMKLDNNDataType(input->type()); - platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(), - input_type, onednn_engine); + dnnl::memory::data_type input_type = framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())); + platform::ReorderMKLDNNHandler reorder_handler( + input_dims, framework::TransToProtoVarType(input->dtype()), + input_type, onednn_engine); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( input->format(), platform::to_void_cast(input->data())); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc index 7ad048cfe12cd..f99b72faba4ae 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc @@ -36,12 +36,12 @@ class ReduceMaxNPUKernel : public framework::OpKernel { cast_out.Resize(out->dims()); cast_out.mutable_data(place); - auto cast_out_dtype = x->type(); + auto cast_out_dtype = framework::TransToProtoVarType(x->dtype()); if (out_dtype != -1) { cast_out_dtype = static_cast(out_dtype); } - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) { if (cast_out_dtype == framework::proto::VarType::FP32) { out->mutable_data(place); } else if (cast_out_dtype == framework::proto::VarType::FP16) { @@ -75,7 +75,8 @@ class ReduceMaxNPUKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); - if (x->type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT64) { auto op_func = [](const std::vector& inputs, const std::vector& outputs, const NPUAttributeMap& attrs, @@ -94,7 +95,7 @@ class ReduceMaxNPUKernel : public framework::OpKernel { runner.Run(dev_ctx.stream()); } - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) { auto dst_dtype = ConvertToNpuDtype(cast_out_dtype); const auto& runner_cast = NpuOpRunner("Cast", {cast_out}, {*out}, diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc index ef7e9940f0590..a7ea28314fa79 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc @@ -46,10 +46,12 @@ class ReduceMeanMLUKernel : public framework::OpKernel { } } - MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(input->type())); - MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(output->type())); + MLUCnnlTensorDesc input_desc( + *input, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(input->dtype()))); + MLUCnnlTensorDesc output_desc( + *output, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(output->dtype()))); MLUCnnlReduceDesc reduction_desc( reduce_dims, CNNL_REDUCE_AVG, ToCnnlDataType(), @@ -89,7 +91,8 @@ class ReduceMeanGradMLUKernel : public framework::OpKernel { reduce_numel *= input_dims[d]; } - Tensor tmp_output_grad(output_grad->type()); + Tensor tmp_output_grad( + framework::TransToProtoVarType(output_grad->dtype())); auto tmp_output_dims = input_dims; for (auto d : reduce_dims) { tmp_output_dims[d] = 1; @@ -97,10 +100,13 @@ class ReduceMeanGradMLUKernel : public framework::OpKernel { tmp_output_grad.ShareDataWith(*output_grad); tmp_output_grad.Resize(framework::make_ddim(tmp_output_dims)); - MLUCnnlTensorDesc output_grad_desc(tmp_output_grad, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(tmp_output_grad.type())); - MLUCnnlTensorDesc input_grad_desc(*input_grad, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(input_grad->type())); + MLUCnnlTensorDesc output_grad_desc( + tmp_output_grad, CNNL_LAYOUT_ARRAY, + ToCnnlDataType( + framework::TransToProtoVarType(tmp_output_grad.dtype()))); + MLUCnnlTensorDesc input_grad_desc( + *input_grad, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(input_grad->dtype()))); auto value = static_cast(1.0 / static_cast(reduce_numel)); MLUCnnl::Fill(context, value, input_grad_desc.get(), diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc index d9a62ce4dc97d..96ad2e448f58a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc @@ -36,12 +36,12 @@ class ReduceMinNPUKernel : public framework::OpKernel { cast_out.Resize(out->dims()); cast_out.mutable_data(place); - auto cast_out_dtype = x->type(); + auto cast_out_dtype = framework::TransToProtoVarType(x->dtype()); if (out_dtype != -1) { cast_out_dtype = static_cast(out_dtype); } - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->type()) != cast_out_dtype) { if (cast_out_dtype == framework::proto::VarType::FP32) { out->mutable_data(place); } else if (cast_out_dtype == framework::proto::VarType::FP16) { @@ -75,7 +75,7 @@ class ReduceMinNPUKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); - if (x->type() == framework::proto::VarType::INT64) { + if (x->dtype() == experimental::DataType::INT64) { auto op_func = [](const std::vector& inputs, const std::vector& outputs, const NPUAttributeMap& attrs, @@ -94,7 +94,7 @@ class ReduceMinNPUKernel : public framework::OpKernel { runner.Run(dev_ctx.stream()); } - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->type()) != cast_out_dtype) { auto dst_dtype = ConvertToNpuDtype(cast_out_dtype); const auto& runner_cast = NpuOpRunner("Cast", {cast_out}, {*out}, diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 4101c8b73e7a0..fa322bba8a0f8 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -25,6 +25,7 @@ limitations under the License. */ #include "paddle/pten/kernels/funcs/math_function.h" // only can include the headers in paddle/pten/api dirs +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/kernels/cpu/reduce.h" @@ -242,8 +243,8 @@ class ReduceKernel : public framework::OpKernel { auto* input = context.Input("X"); if (out_dtype < 0) { - cast_out_dtype = - static_cast(input->type()); + cast_out_dtype = static_cast( + framework::TransToProtoVarType(input->dtype())); } else { cast_out_dtype = static_cast(out_dtype); } @@ -261,7 +262,7 @@ class ReduceKernel : public framework::OpKernel { static_cast::TYPE&>(dev_ctx), *input, reduce_all, tmp_dims, keep_dim, - pten::TransToPtenDataType(cast_out_dtype), output); + framework::TransToPtenDataType(cast_out_dtype), output); } }; template @@ -438,8 +439,9 @@ class ReduceGradKernel : public framework::OpKernel { if (in_dtype >= 0) { Tensor tmp_tensor; auto* pre_input = context.Input(framework::GradVarName("Out")); - auto in_kernel_type = - framework::OpKernelType(pre_input->type(), context.GetPlace()); + auto in_kernel_type = framework::OpKernelType( + framework::TransToProtoVarType(pre_input->dtype()), + context.GetPlace()); auto out_kernel_type = framework::OpKernelType( static_cast(in_dtype), context.GetPlace()); @@ -681,25 +683,20 @@ class ReduceCudaKernel : public framework::OpKernel { const Tensor* input = context.Input("X"); Tensor* output = context.Output("Out"); auto out_dtype = context.Attr("out_dtype"); + auto pt_out_dtype = paddle::framework::TransToPtenDataType( + static_cast(out_dtype)); std::vector dims = context.Attr>("dim"); auto& dev_ctx = context.cuda_device_context(); if (out_dtype >= 0) { - output->mutable_data( - dev_ctx.GetPlace(), - static_cast(out_dtype)); + output->mutable_data(dev_ctx.GetPlace(), pt_out_dtype); } else { - output->mutable_data( - dev_ctx.GetPlace(), - static_cast(input->type())); + output->mutable_data(dev_ctx.GetPlace(), input->dtype()); } std::vector dims_int64{dims.begin(), dims.end()}; - auto pt_out_dtype = pten::TransToPtenDataType( - static_cast(out_dtype)); - pten::Reduce( dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output); } @@ -716,6 +713,8 @@ class ReduceCudaGradKernel : public framework::OpKernel { context.Input(framework::GradVarName("Out")); auto* d_x = context.Output(framework::GradVarName("X")); auto out_dtype = context.Attr("in_dtype"); + auto pt_out_dtype = framework::TransToPtenDataType( + static_cast(out_dtype)); // get reduce_dim and reduce_num for reduce_mean_grad int dim_size = in_x->dims().size(); std::vector reduce_dims = GetReduceDim(dims, dim_size, reduce_all); @@ -731,21 +730,14 @@ class ReduceCudaGradKernel : public framework::OpKernel { new_d_out.Resize(paddle::framework::make_ddim(update_dims)); auto& dev_ctx = context.cuda_device_context(); if (out_dtype > 0) { - d_x->mutable_data( - dev_ctx.GetPlace(), - static_cast(out_dtype)); + d_x->mutable_data(dev_ctx.GetPlace(), pt_out_dtype); } else { - d_x->mutable_data( - dev_ctx.GetPlace(), - static_cast(d_out->type())); + d_x->mutable_data(dev_ctx.GetPlace(), d_out->dtype()); } auto pt_d_out = paddle::experimental::MakePtenDenseTensor(new_d_out); auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x); - auto pt_out_dtype = pten::TransToPtenDataType( - static_cast(out_dtype)); if (out_dtype <= 0) { - pt_out_dtype = pten::TransToPtenDataType( - static_cast(d_out->type())); + pt_out_dtype = d_out->dtype(); } using MPType = typename kps::details::MPTypeTrait::Type; pten::ReduceGrad>( diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc index 9de50b96d2727..4e24d07ee09e4 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc @@ -36,12 +36,12 @@ class ReduceProdNPUKernel : public framework::OpKernel { cast_out.Resize(out->dims()); cast_out.mutable_data(place); - auto cast_out_dtype = x->type(); + auto cast_out_dtype = framework::TransToProtoVarType(x->dtype()); if (out_dtype != -1) { cast_out_dtype = static_cast(out_dtype); } - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) { if (cast_out_dtype == framework::proto::VarType::FP32) { out->mutable_data(place); } else if (cast_out_dtype == framework::proto::VarType::FP16) { @@ -81,7 +81,7 @@ class ReduceProdNPUKernel : public framework::OpKernel { NpuOpRunner("ReduceProdD", {*x}, {cast_out}, attr_input); runner.Run(stream); - if (x->type() != cast_out_dtype) { + if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) { auto dst_dtype = ConvertToNpuDtype(cast_out_dtype); const auto& runner_cast = NpuOpRunner("Cast", {cast_out}, {*out}, diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h index 79b3480afbcd7..fc3dba106ac93 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h @@ -79,8 +79,9 @@ class ReduceSumGradKernel : public framework::OpKernel { if (in_dtype >= 0) { Tensor tmp_tensor; auto* pre_input = context.Input(framework::GradVarName("Out")); - auto in_kernel_type = - framework::OpKernelType(pre_input->type(), context.GetPlace()); + auto in_kernel_type = framework::OpKernelType( + framework::TransToProtoVarType(pre_input->dtype()), + context.GetPlace()); auto out_kernel_type = framework::OpKernelType( static_cast(in_dtype), context.GetPlace()); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc index d68de5eaa728d..257c73b5ff49b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc @@ -46,8 +46,10 @@ class ReduceSumNPUKernel : public framework::OpKernel { framework::Tensor cast_x; framework::Tensor cast_out; // NOTE: ReduceSumD only supports fp32 and fp16 - if (x->type() != framework::proto::VarType::FP32 && - x->type() != framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(x->dtype()) != + framework::proto::VarType::FP32 && + framework::TransToProtoVarType(x->dtype()) != + framework::proto::VarType::FP16) { cast_x.Resize(x->dims()); cast_x.mutable_data(ctx.GetPlace()); auto dst_dtype = ConvertToNpuDtype(framework::proto::VarType::FP32); @@ -80,9 +82,12 @@ class ReduceSumNPUKernel : public framework::OpKernel { runner.Run(stream); } - if (x->type() != framework::proto::VarType::FP32 && - x->type() != framework::proto::VarType::FP16) { - auto dst_dtype = ConvertToNpuDtype(out->type()); + if (framework::TransToProtoVarType(x->dtype()) != + framework::proto::VarType::FP32 && + framework::TransToProtoVarType(x->dtype()) != + framework::proto::VarType::FP16) { + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(out->dtype())); const auto& runner_cast = NpuOpRunner("Cast", {cast_out}, {*out}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/repeat_interleave_op.cu b/paddle/fluid/operators/repeat_interleave_op.cu index 8acb4f216ea39..88f7619259b5c 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cu +++ b/paddle/fluid/operators/repeat_interleave_op.cu @@ -99,7 +99,8 @@ class RepeatInterleaveCUDAKernel : public framework::OpKernel { "But received: [%s], required: [%d].", repeats_tensor->dims()[0], in->dims()[dim])); - const auto& index_type = repeats_tensor->type(); + const auto& index_type = + framework::TransToProtoVarType(repeats_tensor->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ( @@ -215,7 +216,8 @@ class RepeatInterleaveGradCUDAKernel : public framework::OpKernel { auto repeats_tensor = context.Input("RepeatsTensor"); - const auto& index_type = repeats_tensor->type(); + const auto& index_type = + framework::TransToProtoVarType(repeats_tensor->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT64 || index_type == framework::proto::VarType::INT32; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/repeat_interleave_op.h b/paddle/fluid/operators/repeat_interleave_op.h index ca861696d719e..4e47226358aa9 100644 --- a/paddle/fluid/operators/repeat_interleave_op.h +++ b/paddle/fluid/operators/repeat_interleave_op.h @@ -78,7 +78,8 @@ class RepeatInterleaveKernel : public framework::OpKernel { "But received: [%s], required: [%d].", repeats_tensor->dims()[0], inputs.dims()[dim])); - const auto& index_type = repeats_tensor->type(); + const auto& index_type = + framework::TransToProtoVarType(repeats_tensor->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( @@ -149,7 +150,8 @@ class RepeatInterleaveGradKernel : public framework::OpKernel { if (context.HasInput("RepeatsTensor")) { auto repeats_tensor = context.Input("RepeatsTensor"); - const auto& index_type = repeats_tensor->type(); + const auto& index_type = + framework::TransToProtoVarType(repeats_tensor->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc index 5d6876465c8f6..06c3b3fe76f3a 100644 --- a/paddle/fluid/operators/rnn_memory_helper_op.cc +++ b/paddle/fluid/operators/rnn_memory_helper_op.cc @@ -120,7 +120,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { auto &in_var_tensor = in_var->Get(); framework::AttributeMap attrs; - attrs["dtype"] = in_var_tensor.type(); + attrs["dtype"] = framework::TransToProtoVarType(in_var_tensor.dtype()); attrs["shape"] = framework::vectorize(in_var_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc index 7e19287d42565..efe13251e6fa8 100644 --- a/paddle/fluid/operators/roi_align_op_npu.cc +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -53,7 +53,7 @@ class ROIAlignNPUKernel : public framework::OpKernel { int dtype = static_cast(ConvertToNpuDtype(framework::proto::VarType::FP32)); framework::NPUAttributeMap attr_cast = {{"dst_type", dtype}}; - Tensor ROIsNum_fp(ROIs->type()); + Tensor ROIsNum_fp(ROIs->dtype()); ROIsNum_fp.Resize(framework::make_ddim({ROIs->dims()[0], 1})); ROIsNum_fp.mutable_data(ctx.GetPlace()); @@ -67,7 +67,7 @@ class ROIAlignNPUKernel : public framework::OpKernel { x_list.push_back(*ROIs); auto axis = 1; // output of concate - Tensor ROIs_N5(ROIs->type()); + Tensor ROIs_N5(ROIs->dtype()); ROIs_N5.Resize(framework::make_ddim({ROIs->dims()[0], 5})); ROIs_N5.mutable_data(ctx.GetPlace()); @@ -128,7 +128,8 @@ class ROIAlignNPUGradKernel : public framework::OpKernel { platform::errors::NotFound("Input(RoisNum) of ROIAlignGradOp " "is not found while using NPU.")); PADDLE_ENFORCE_EQ( - rois->type(), framework::proto::VarType::FP32, + framework::TransToProtoVarType(rois->dtype()), + framework::proto::VarType::FP32, platform::errors::InvalidArgument( "ROIAlignGradNPU only support ROIs type equaled to FP32.")); diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index 0aa39c9af5c17..364a490536bd6 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -85,7 +86,7 @@ class SaveCombineOpKernel : public framework::OpKernel { inp_var_names[i])); // Serialize tensors one by one // Check types to see if a fp16 transformation is required - auto in_dtype = tensor.type(); + auto in_dtype = framework::TransToProtoVarType(tensor.dtype()); auto out_dtype = save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; diff --git a/paddle/fluid/operators/save_op.h b/paddle/fluid/operators/save_op.h index 2a61d7ce0c25b..bdabd3ba7ef27 100644 --- a/paddle/fluid/operators/save_op.h +++ b/paddle/fluid/operators/save_op.h @@ -84,7 +84,7 @@ class SaveOpKernel : public framework::OpKernel { "Cannot open %s to save variables.", filename)); auto save_as_fp16 = ctx.Attr("save_as_fp16"); - auto in_dtype = tensor.type(); + auto in_dtype = framework::TransToProtoVarType(tensor.dtype()); auto out_dtype = save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; if (in_dtype != out_dtype) { diff --git a/paddle/fluid/operators/scale_op_mlu.cc b/paddle/fluid/operators/scale_op_mlu.cc index d027ac0d3317f..fa222ecae1c93 100644 --- a/paddle/fluid/operators/scale_op_mlu.cc +++ b/paddle/fluid/operators/scale_op_mlu.cc @@ -32,12 +32,14 @@ class ScaleMLUKernel : public framework::OpKernel { if (ctx.HasInput("ScaleTensor")) { framework::Tensor float_scale_tensor = *ctx.Input("ScaleTensor"); - if (float_scale_tensor.type() != in->type()) { + if (framework::TransToProtoVarType(float_scale_tensor.dtype()) != + framework::TransToProtoVarType(in->dtype())) { scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc float_scale_desc(float_scale_tensor); MLUCnnlTensorDesc final_scale_desc(scale_tensor); - cnnlCastDataType_t cast_type = - GetCastDataType(float_scale_tensor.type(), scale_tensor.type()); + cnnlCastDataType_t cast_type = GetCastDataType( + framework::TransToProtoVarType(float_scale_tensor.dtype()), + framework::TransToProtoVarType(scale_tensor.dtype())); MLUCnnl::Cast(ctx, cast_type, float_scale_desc.get(), GetBasePtr(&float_scale_tensor), final_scale_desc.get(), GetBasePtr(&scale_tensor)); @@ -83,13 +85,15 @@ class ScaleMLUKernel : public framework::OpKernel { ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc new_bias_desc(new_bias_tensor); - MLUCnnlOpTensorDesc mul_op_desc(CNNL_OP_TENSOR_MUL, - ToCnnlDataType(in->type()), - CNNL_NOT_PROPAGATE_NAN); + MLUCnnlOpTensorDesc mul_op_desc( + CNNL_OP_TENSOR_MUL, + ToCnnlDataType(framework::TransToProtoVarType(in->dtype())), + CNNL_NOT_PROPAGATE_NAN); MLUCnnl::OpTensor( ctx, mul_op_desc.get(), scale_desc.get(), GetBasePtr(&scale_tensor), bias_desc.get(), GetBasePtr(&bias_tensor), new_bias_desc.get(), - GetBasePtr(&new_bias_tensor), ToCnnlDataType(in->type())); + GetBasePtr(&new_bias_tensor), + ToCnnlDataType(framework::TransToProtoVarType(in->dtype()))); MLUCnnl::Scale(ctx, axis, input_desc.get(), GetBasePtr(in), scale_desc.get(), GetBasePtr(&scale_tensor), new_bias_desc.get(), GetBasePtr(&new_bias_tensor), diff --git a/paddle/fluid/operators/scale_op_npu.cc b/paddle/fluid/operators/scale_op_npu.cc index 807ad7509e573..8ff565d4e7870 100644 --- a/paddle/fluid/operators/scale_op_npu.cc +++ b/paddle/fluid/operators/scale_op_npu.cc @@ -79,11 +79,13 @@ class ScaleNPUKernel : public framework::OpKernel { adds_runner.Run(dev_ctx.stream()); }; - if (x->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT32) { NpuOpRunner::TypeAdapter({*x}, {*out}, attrs, dev_ctx, op_func, {framework::proto::VarType::INT32}, {framework::proto::VarType::INT32}); - } else if (x->type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT64) { NpuOpRunner::TypeAdapter({*x}, {*out}, attrs, dev_ctx, op_func, {framework::proto::VarType::INT32}, {framework::proto::VarType::INT32}); diff --git a/paddle/fluid/operators/scatter_nd_add_op.cc b/paddle/fluid/operators/scatter_nd_add_op.cc index 2d23e81717abb..fa9610b062a1f 100644 --- a/paddle/fluid/operators/scatter_nd_add_op.cc +++ b/paddle/fluid/operators/scatter_nd_add_op.cc @@ -98,8 +98,9 @@ class ScatterNdAddOp : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, "Updates"), platform::errors::InvalidArgument( "Ref and Updates must have same type")); - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); + return framework::OpKernelType( + framework::TransToProtoVarType(ctx.Input("X")->type()), + ctx.device_context()); } }; diff --git a/paddle/fluid/operators/scatter_nd_add_op.cu b/paddle/fluid/operators/scatter_nd_add_op.cu index ec2a0201de63d..6448f8cc4056d 100644 --- a/paddle/fluid/operators/scatter_nd_add_op.cu +++ b/paddle/fluid/operators/scatter_nd_add_op.cu @@ -33,7 +33,7 @@ class ScatterNdAddOpCUDAKernel : public framework::OpKernel { auto *Out = ctx.Output("Out"); framework::TensorCopySync(*X, ctx.GetPlace(), Out); - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -70,7 +70,7 @@ class ScatterNdAddGradOpCUDAKernel : public framework::OpKernel { if (dUpdates) { dUpdates->mutable_data(ctx.GetPlace()); // Gradient by Gather - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); if (index_type == framework::proto::VarType::INT32) { GPUGatherNd(ctx, *dOut, *Ids, dUpdates); } else { diff --git a/paddle/fluid/operators/scatter_nd_add_op.h b/paddle/fluid/operators/scatter_nd_add_op.h index 904b8a421d015..2bdf9ec58a850 100644 --- a/paddle/fluid/operators/scatter_nd_add_op.h +++ b/paddle/fluid/operators/scatter_nd_add_op.h @@ -37,7 +37,7 @@ class ScatterNdAddOpKernel : public framework::OpKernel { // In place output: Out = X framework::TensorCopySync(*X, ctx.GetPlace(), Out); - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -76,7 +76,7 @@ class ScatterNdAddGradientOpKernel : public framework::OpKernel { if (dUpdates) { dUpdates->mutable_data(ctx.GetPlace()); // Gradient by Gather: dUpdates = dO[Ids] - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); if (index_type == framework::proto::VarType::INT32) { CPUGatherNd(ctx.device_context(), *dOut, *Ids, dUpdates); } else { diff --git a/paddle/fluid/operators/scatter_op.cu b/paddle/fluid/operators/scatter_op.cu index c8dae35822544..549e30803b464 100644 --- a/paddle/fluid/operators/scatter_op.cu +++ b/paddle/fluid/operators/scatter_op.cu @@ -35,7 +35,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel { framework::TensorCopy(*X, ctx.GetPlace(), Out); // use template class to support int32_t and int64_t - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( @@ -68,7 +68,7 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel { auto *Ids = ctx.Input("Ids"); auto *dOut = ctx.Input(framework::GradVarName("Out")); - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/scatter_op.h b/paddle/fluid/operators/scatter_op.h index 185398bed10ea..69ab6c7135cd5 100644 --- a/paddle/fluid/operators/scatter_op.h +++ b/paddle/fluid/operators/scatter_op.h @@ -39,7 +39,7 @@ class ScatterOpKernel : public framework::OpKernel { // In place output: Out = X, Out[Ids] = Updates framework::TensorCopy(*X, ctx.GetPlace(), Out); // Apply ScatterUpdate: Out[index] = Updates[:] - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, @@ -79,7 +79,7 @@ class ScatterGradientOpKernel : public framework::OpKernel { auto *Ids = ctx.Input("Ids"); auto *dOut = ctx.Input(framework::GradVarName("Out")); - const auto &index_type = Ids->type(); + const auto &index_type = framework::TransToProtoVarType(Ids->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc index 8d92ea4166513..3d2563c584af2 100644 --- a/paddle/fluid/operators/scatter_op_npu.cc +++ b/paddle/fluid/operators/scatter_op_npu.cc @@ -68,7 +68,8 @@ class ScatterNPUKernel : public framework::OpKernel { }; if (overwrite) { - if (x->type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT64) { NpuOpRunner::TypeAdapter( {*x, *index, *updates}, {*out}, {}, dev_ctx, op_func_update, {framework::proto::VarType::INT32, framework::proto::VarType::INT32, @@ -80,7 +81,8 @@ class ScatterNPUKernel : public framework::OpKernel { runner_update.Run(dev_ctx.stream()); } } else { - if (x->type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT64) { NpuOpRunner::TypeAdapter( {*x, *index, *updates}, {*out}, {}, dev_ctx, op_func_add, {framework::proto::VarType::INT32, framework::proto::VarType::INT32, diff --git a/paddle/fluid/operators/scatter_op_xpu.cc b/paddle/fluid/operators/scatter_op_xpu.cc index fadf063bc5bd6..9eeebf8420b83 100644 --- a/paddle/fluid/operators/scatter_op_xpu.cc +++ b/paddle/fluid/operators/scatter_op_xpu.cc @@ -36,7 +36,7 @@ class ScatterOpXPUKernel : public framework::OpKernel { // In place output: Out = X, Out[ids] = Updates framework::TensorCopy(*x, ctx.GetPlace(), out); // Apply ScatterUpdate: Out[index] = Updates[:] - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/searchsorted_op.h b/paddle/fluid/operators/searchsorted_op.h index 9eec755293199..f421bdd3cb712 100644 --- a/paddle/fluid/operators/searchsorted_op.h +++ b/paddle/fluid/operators/searchsorted_op.h @@ -180,12 +180,12 @@ class SearchSortedKernel : public framework::OpKernel { int* out_data = out->mutable_data(context.GetPlace()); SearchSortedFunctor functor( context, sorted_sequence, value, right, out_data); - VisitDataType(value->type(), functor); + VisitDataType(framework::TransToProtoVarType(value->dtype()), functor); } else { int64_t* out_data = out->mutable_data(context.GetPlace()); SearchSortedFunctor functor( context, sorted_sequence, value, right, out_data); - VisitDataType(value->type(), functor); + VisitDataType(framework::TransToProtoVarType(value->dtype()), functor); } } }; diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h index 47b18e04e4dcc..fe89ef7212f0c 100644 --- a/paddle/fluid/operators/segment_pool_op.h +++ b/paddle/fluid/operators/segment_pool_op.h @@ -122,7 +122,7 @@ class SegmentPoolKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* segment = context.Input("SegmentIds"); - auto index_type = segment->type(); + auto index_type = framework::TransToProtoVarType(segment->dtype()); if (index_type == framework::proto::VarType::INT32) { SegmentKernelLaunchHelper(context); } else if (index_type == framework::proto::VarType::INT64) { @@ -156,7 +156,7 @@ class SegmentPoolGradKernel : public framework::OpKernel { auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, in_g, static_cast(0)); - auto index_type = segment->type(); + auto index_type = framework::TransToProtoVarType(segment->dtype()); if (index_type == framework::proto::VarType::INT32) { SegmentPoolGradFunctor pool; pool(context.template device_context(), *input, *output, diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc index 675ea175a16a9..ee108eae53e08 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc @@ -59,9 +59,10 @@ class SequenceMaskNPUKernel : public framework::OpKernel { Tensor cast_x; cast_x.mutable_data(x->dims(), ctx.GetPlace()); - const auto& cast1_runner = - NpuOpRunner("Cast", {*x}, {cast_x}, - {{"dst_type", ConvertToNpuDtype(cast_x.type())}}); + const auto& cast1_runner = NpuOpRunner( + "Cast", {*x}, {cast_x}, + {{"dst_type", + ConvertToNpuDtype(framework::TransToProtoVarType(cast_x.dtype()))}}); cast1_runner.Run(dev_ctx.stream()); Tensor tmp; diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 633bc468dc44e..d9dc4ad0a3aba 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -18,6 +18,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -176,7 +177,6 @@ class SetValueKernel : public framework::OpKernel { auto decrease_axes = ctx.Attr>("decrease_axes"); auto none_axes = ctx.Attr>("none_axes"); - auto dtype = in->type(); if (!starts_tensor_list.empty()) { starts = GetDataFromTensorList(starts_tensor_list); } @@ -235,7 +235,7 @@ class SetValueKernel : public framework::OpKernel { // set_value is what we want. paddle::framework::TensorCopy(*in, place, out); - Tensor slice_tensor(dtype), pad_tensor(dtype); + Tensor slice_tensor(in->dtype()), pad_tensor(in->dtype()); slice_tensor.mutable_data(slice_dims, place); pad_tensor.mutable_data(in_dims, place); @@ -292,12 +292,13 @@ class SetValueKernel : public framework::OpKernel { ElementwiseComputeEx, DeviceContext, T>( ctx, &slice_tensor, value_tensor, -1, SubFunctor(), &slice_tensor); } else { - Tensor value_t(dtype); + Tensor value_t(in->dtype()); auto value_dims = framework::make_ddim(shape); CheckIsDimsMatch(slice_dims_for_assign, value_dims); value_t.mutable_data(value_dims, place); - auto value_name = GetValueName(dtype); + auto value_name = + GetValueName(framework::TransToProtoVarType(in->dtype())); CopyVecotorToTensor(value_name.c_str(), &value_t, ctx); value_t.Resize(value_dims); ElementwiseComputeEx, DeviceContext, T>( @@ -447,7 +448,7 @@ class SetValueGradKernel : public framework::OpKernel { framework::EigenTensor::From(*grad_input); - framework::Tensor tmp(grad_input->type()); + framework::Tensor tmp(grad_input->dtype()); tmp.mutable_data(out_dims, context.GetPlace()); set_zero(dev_ctx, &tmp, static_cast(0)); auto tmp_t = framework::EigenTensor { framework::EigenTensor::From(*grad_value); if (need_reverse) { - framework::Tensor tmp(grad_value->type()); + framework::Tensor tmp(grad_value->dtype()); tmp.mutable_data(out_dims, context.GetPlace()); set_zero(dev_ctx, &tmp, static_cast(0)); auto tmp_t = framework::EigenTensor { framework::EigenTensor:: From(*grad_value, fake_grad_value_dims); - framework::Tensor tmp(grad_value->type()); + framework::Tensor tmp(grad_value->dtype()); tmp.mutable_data(out_dims, context.GetPlace()); set_zero(dev_ctx, &tmp, static_cast(0)); auto tmp_t = framework::EigenTensor { tmp_t.slice(framework::EigenDim::From(offset), extent); } if (need_reverse) { - framework::Tensor tmp_value(grad_value->type()); + framework::Tensor tmp_value(grad_value->dtype()); tmp_value.mutable_data(fake_grad_value_dims, context.GetPlace()); auto tmp_value_t = framework::EigenTensor { CheckIsDimsMatch(slice_dims_for_assign, value_dims); value_t.mutable_data(value_dims, ctx.GetPlace()); - auto value_name = GetValueName(in->type()); + auto value_name = + GetValueName(framework::TransToProtoVarType(in->dtype())); CopyVecotorToTensor(value_name.c_str(), &value_t, ctx); value_t.Resize(value_dims); } diff --git a/paddle/fluid/operators/shard_index_op_npu.cc b/paddle/fluid/operators/shard_index_op_npu.cc index a19deb160dbd8..dc2e8ad58f31c 100644 --- a/paddle/fluid/operators/shard_index_op_npu.cc +++ b/paddle/fluid/operators/shard_index_op_npu.cc @@ -67,7 +67,7 @@ class ShardIndexNPUKernel : public framework::OpKernel { tmp.mutable_data(framework::DDim({1}), place); FillNpuTensorWithConstant(&tmp, shard_size); - Tensor condition(framework::proto::VarType::BOOL); + Tensor condition(experimental::DataType::BOOL); condition.mutable_data(in->dims(), place); Tensor tmp2(in->type()); diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 38721e5e3e5bd..a15c2f3d96c16 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -79,7 +79,7 @@ class ShrinkRNNMemoryOp : public ArrayOp { } if (dst_num_rows != 0) { - out_tensor.mutable_data(place, x_tensor.type()); + out_tensor.mutable_data(place, x_tensor.dtype()); auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); framework::TensorCopy(x_tensor.Slice(0, height), place, *dev_ctx, &out_tensor); @@ -149,7 +149,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto &x_tensor = x_var->Get(); auto &dx_tensor = *dx_var->GetMutable(); dx_tensor.Resize(x_tensor.dims()); - dx_tensor.mutable_data(x_tensor.place(), x_tensor.type()); + dx_tensor.mutable_data(x_tensor.place(), x_tensor.dtype()); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 37c79b9898e9d..a3d064a8e6dbf 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -130,7 +130,9 @@ class SliceOp : public framework::OperatorWithKernel { "The tensor Input (Input) of Slice op is not initialized.")); // NOTE: cuda pinned tensor need to copy its data to target place if (platform::is_cuda_pinned_place(in_tensor.place())) { - return framework::OpKernelType(in_tensor.type(), ctx.device_context()); + return framework::OpKernelType( + framework::TransToProtoVarType(in_tensor.dtype()), + ctx.device_context()); } #ifdef PADDLE_WITH_MKLDNN @@ -152,7 +154,8 @@ class SliceOp : public framework::OperatorWithKernel { } #endif - return framework::OpKernelType(in_tensor.type(), in_tensor.place()); + return framework::OpKernelType( + framework::TransToProtoVarType(in_tensor.dtype()), in_tensor.place()); } return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace()); diff --git a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc index a05ab9056b5cd..73da0e4756b1c 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc @@ -42,12 +42,12 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { const auto& runner1 = NpuOpRunner("Sub", {*in_x, *in_y}, {*out_diff}, {}); runner1.Run(stream); - Tensor no_reduce_loss(in_x->type()); + Tensor no_reduce_loss(in_x->dtype()); no_reduce_loss.Resize(in_x->dims()); no_reduce_loss.mutable_data(context.GetPlace()); // multiply inside weight before get the loss if (has_weight) { - Tensor tmp_diff(out_diff->type()); + Tensor tmp_diff(out_diff->dtype()); tmp_diff.Resize(out_diff->dims()); tmp_diff.mutable_data(context.GetPlace()); const auto& runner2 = @@ -58,11 +58,11 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { context.template device_context(), out_diff); - Tensor tmp_x(in_x->type()); + Tensor tmp_x(in_x->dtype()); tmp_x.Resize(in_x->dims()); tmp_x.mutable_data(context.GetPlace()); - Tensor tmp_y(in_y->type()); + Tensor tmp_y(in_y->dtype()); tmp_y.Resize(in_y->dims()); tmp_y.mutable_data(context.GetPlace()); @@ -85,7 +85,7 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { // multiply outside weight and loss // reduceSum because the output'shape must be [B,1] if (has_weight) { - Tensor tmp_loss(no_reduce_loss.type()); + Tensor tmp_loss(no_reduce_loss.dtype()); tmp_loss.Resize(no_reduce_loss.dims()); tmp_loss.mutable_data(context.GetPlace()); const auto& runner4 = @@ -123,13 +123,13 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { .stream(); // diff == in_x - in_y == diff - 0 - Tensor tmp_zero(diff->type()); + Tensor tmp_zero(diff->dtype()); tmp_zero.Resize(diff->dims()); tmp_zero.mutable_data(context.GetPlace()); const auto& runner_zero = NpuOpRunner("ZerosLike", {*diff}, {tmp_zero}, {}); runner_zero.Run(stream); - Tensor grad(diff->type()); + Tensor grad(diff->dtype()); grad.Resize(diff->dims()); grad.mutable_data(context.GetPlace()); // broadcast og(output_grad) to adapt to the npu interface @@ -138,7 +138,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { {{"shape", framework::vectorize(diff->dims())}}); runner_broad.Run(stream); - Tensor gradient(diff->type()); + Tensor gradient(diff->dtype()); gradient.Resize(diff->dims()); gradient.mutable_data(context.GetPlace()); // diff == diff - 0 == in_x - in_y @@ -149,14 +149,14 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { // mul weight and gradient if (has_weight) { - Tensor weight(inside_weight->type()); + Tensor weight(inside_weight->dtype()); weight.Resize(inside_weight->dims()); weight.mutable_data(context.GetPlace()); const auto& runner_weight = NpuOpRunner("Mul", {*inside_weight, *outside_weight}, {weight}, {}); runner_weight.Run(stream); - Tensor tmp_grad(gradient.type()); + Tensor tmp_grad(gradient.dtype()); tmp_grad.Resize(gradient.dims()); tmp_grad.mutable_data(context.GetPlace()); const auto& runner_weight_grad = @@ -180,7 +180,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { // outy_grad = - gradient if (outy_grad) { outy_grad->mutable_data(context.GetPlace()); - Tensor coeff(framework::proto::VarType::FP32); + Tensor coeff(experimental::DataType::FLOAT32); coeff.mutable_data({1}, context.GetPlace()); FillNpuTensorWithConstant(&coeff, -1); const auto& runner_y_grad = diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h index ef8b0bbdf9d32..a7f88dd0ec38e 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/cross_entropy.h" @@ -54,7 +55,7 @@ static void RunSoftmaxWithCrossEntropyFunctor( const bool soft_label = context.Attr("soft_label"); SoftmaxWithCrossEntropyFunctor functor(context, *labels, soft_label, visitor); - auto dtype = labels->type(); + auto dtype = framework::TransToProtoVarType(labels->dtype()); if (soft_label) { PADDLE_ENFORCE_EQ( dtype, framework::DataTypeTrait::DataType(), diff --git a/paddle/fluid/operators/solve_op.h b/paddle/fluid/operators/solve_op.h index c023d33a444cf..f75dd6243a2ee 100644 --- a/paddle/fluid/operators/solve_op.h +++ b/paddle/fluid/operators/solve_op.h @@ -327,11 +327,11 @@ static void linalg_solve(const framework::ExecutionContext& context, Tensor tmp_y; if (is_vector) { - tmp_y.mutable_data(context.GetPlace(), y->type()); + tmp_y.mutable_data(context.GetPlace(), y->dtype()); to_unsqueeze(context, *y, &tmp_y); } else { tmp_y.Resize(y->dims()); - tmp_y.mutable_data(context.GetPlace(), y->type()); + tmp_y.mutable_data(context.GetPlace(), y->dtype()); framework::TensorCopy( *y, context.GetPlace(), context.template device_context(), &tmp_y); @@ -339,7 +339,7 @@ static void linalg_solve(const framework::ExecutionContext& context, Tensor tmp_x; tmp_x.Resize(x->dims()); - tmp_x.mutable_data(context.GetPlace(), x->type()); + tmp_x.mutable_data(context.GetPlace(), x->dtype()); framework::TensorCopy( *x, context.GetPlace(), context.template device_context(), &tmp_x); @@ -473,11 +473,11 @@ class SolveGradKernel : public framework::OpKernel { Tensor tmp_y; if (is_vector) { - tmp_y.mutable_data(ctx.GetPlace(), y->type()); + tmp_y.mutable_data(ctx.GetPlace(), y->dtype()); to_unsqueeze(ctx, *y, &tmp_y); } else { tmp_y.Resize(y->dims()); - tmp_y.mutable_data(ctx.GetPlace(), y->type()); + tmp_y.mutable_data(ctx.GetPlace(), y->dtype()); framework::TensorCopy( *y, ctx.GetPlace(), ctx.template device_context(), &tmp_y); @@ -485,7 +485,7 @@ class SolveGradKernel : public framework::OpKernel { Tensor tmp_x; tmp_x.Resize(input->dims()); - tmp_x.mutable_data(ctx.GetPlace(), input->type()); + tmp_x.mutable_data(ctx.GetPlace(), input->dtype()); framework::TensorCopy( *input, ctx.GetPlace(), ctx.template device_context(), &tmp_x); @@ -505,7 +505,7 @@ class SolveGradKernel : public framework::OpKernel { tmp_dy.Resize(framework::make_ddim(y_broadcast_dims)); tmp_dy.mutable_data(ctx.GetPlace()); - Tensor tmp_input(input->type()); + Tensor tmp_input(input->dtype()); const auto& new_dims_vec = getNewDimsVec(input->dims()); tmp_input.Resize(framework::make_ddim(new_dims_vec)); tmp_input.mutable_data(ctx.GetPlace()); @@ -530,11 +530,11 @@ class SolveGradKernel : public framework::OpKernel { blas.MatMul(tmp_dy, mat_dim_a1, *out, mat_dim_b1, T(-1), &tmp_dx, T(0)); } else if (is_vector_rhs(*input, *y)) { Tensor tmp_dy_; - tmp_dy_.mutable_data(ctx.GetPlace(), y->type()); + tmp_dy_.mutable_data(ctx.GetPlace(), y->dtype()); to_unsqueeze(ctx, tmp_dy, &tmp_dy_); Tensor tmp_out_; - tmp_out_.mutable_data(ctx.GetPlace(), out->type()); + tmp_out_.mutable_data(ctx.GetPlace(), out->dtype()); to_unsqueeze(ctx, *out, &tmp_out_); auto mat_dim_a1 = @@ -553,7 +553,7 @@ class SolveGradKernel : public framework::OpKernel { if (y->dims() != tmp_dy.dims()) { Tensor dy_help; dy_help.Resize(tmp_dy.dims()); - dy_help.mutable_data(ctx.GetPlace(), tmp_dy.type()); + dy_help.mutable_data(ctx.GetPlace(), tmp_dy.dtype()); framework::TensorCopy( tmp_dy, ctx.GetPlace(), ctx.template device_context(), &dy_help); @@ -607,7 +607,7 @@ class SolveGradKernel : public framework::OpKernel { if (input->dims() != tmp_dx.dims()) { Tensor dx_help; dx_help.Resize(tmp_dx.dims()); - dx_help.mutable_data(ctx.GetPlace(), tmp_dx.type()); + dx_help.mutable_data(ctx.GetPlace(), tmp_dx.dtype()); framework::TensorCopy( tmp_dx, ctx.GetPlace(), ctx.template device_context(), &dx_help); diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index b937de1bc8684..5cc7502bb8c44 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -362,7 +362,8 @@ void DotSdd(const platform::CUDADeviceContext& ctx, const Tensor* a, const int* c_columns_data = c_columns->data(); T* c_value_data = c_value->data(); - cudaDataType_t gpu_type = GetGpuType(c_value->type()); + cudaDataType_t gpu_type = + GetGpuType(framework::TransToProtoVarType(c_value->dtype())); cusparseHandle_t handle = nullptr; cusparseDnMatDescr_t mat_a, mat_b; cusparseSpMatDescr_t mat_c; @@ -418,7 +419,8 @@ void DotDsd(const platform::CUDADeviceContext& ctx, const Tensor* a_offset, const T* b_data = b->data(); T* c_data = c->data(); - cudaDataType_t gpu_type = GetGpuType(c->type()); + cudaDataType_t gpu_type = + GetGpuType(framework::TransToProtoVarType(c->dtype())); cusparseHandle_t handle = nullptr; cusparseSpMatDescr_t mat_a; cusparseDnMatDescr_t mat_b, mat_c; diff --git a/paddle/fluid/operators/spectral_op.cc b/paddle/fluid/operators/spectral_op.cc index 64751a21c837d..325bbd99d796b 100644 --- a/paddle/fluid/operators/spectral_op.cc +++ b/paddle/fluid/operators/spectral_op.cc @@ -572,12 +572,14 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out, // make a DFTI_DESCRIPTOR DftiDescriptor desc = - _plan_mkl_fft(x->type(), out->type(), input_stride, output_stride, - signal_sizes, normalization, forward); + _plan_mkl_fft(framework::TransToProtoVarType(x->dtype()), + framework::TransToProtoVarType(out->dtype()), input_stride, + output_stride, signal_sizes, normalization, forward); const FFTTransformType fft_type = GetFFTTransformType(x->type(), out->type()); if (fft_type == FFTTransformType::C2R && forward) { - framework::Tensor collapsed_input_conj(collapsed_input.type()); + framework::Tensor collapsed_input_conj( + framework::TransToProtoVarType(collapsed_input.dtype())); collapsed_input_conj.mutable_data(collapsed_input.dims(), ctx.GetPlace()); // conjugate the input @@ -589,7 +591,8 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out, MKL_DFTI_CHECK(platform::dynload::DftiComputeBackward( desc.get(), collapsed_input_conj.data(), collapsed_output.data())); } else if (fft_type == FFTTransformType::R2C && !forward) { - framework::Tensor collapsed_output_conj(collapsed_output.type()); + framework::Tensor collapsed_output_conj( + framework::TransToProtoVarType(collapsed_output.dtype())); collapsed_output_conj.mutable_data(collapsed_output.dims(), ctx.GetPlace()); MKL_DFTI_CHECK(platform::dynload::DftiComputeForward( diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu index d6a775dd55de8..c932834db39b3 100644 --- a/paddle/fluid/operators/spectral_op.cu +++ b/paddle/fluid/operators/spectral_op.cu @@ -73,10 +73,13 @@ FFTConfigKey create_fft_configkey(const framework::Tensor& input, const framework::Tensor& output, int signal_ndim) { // Create the transform plan (either from cache or locally) - const auto value_type = framework::IsComplexType(input.type()) - ? framework::ToRealType(input.type()) - : input.type(); - auto fft_type = GetFFTTransformType(input.type(), output.type()); + const auto value_type = + framework::IsComplexType(framework::TransToProtoVarType(input.dtype())) + ? framework::ToRealType(framework::TransToProtoVarType(input.dtype())) + : framework::TransToProtoVarType(input.dtype()); + auto fft_type = + GetFFTTransformType(framework::TransToProtoVarType(input.dtype()), + framework::TransToProtoVarType(output.dtype())); // signal sizes std::vector signal_size(signal_ndim + 1); @@ -137,10 +140,13 @@ FFTConfigKey create_fft_configkey(const framework::Tensor& input, const framework::Tensor& output, int signal_ndim) { // Create the transform plan (either from cache or locally) - const auto value_type = framework::IsComplexType(input.type()) - ? framework::ToRealType(input.type()) - : input.type(); - auto fft_type = GetFFTTransformType(input.type(), output.type()); + const auto value_type = + framework::IsComplexType(framework::TransToProtoVarType(input.dtype())) + ? framework::ToRealType(framework::TransToProtoVarType(input.dtype())) + : framework::TransToProtoVarType(input.dtype()); + auto fft_type = + GetFFTTransformType(framework::TransToProtoVarType(input.dtype()), + framework::TransToProtoVarType(output.type())); // signal sizes std::vector signal_size(signal_ndim + 1); diff --git a/paddle/fluid/operators/spectral_op.h b/paddle/fluid/operators/spectral_op.h index e549c4a454b19..bff8bb5707f1c 100644 --- a/paddle/fluid/operators/spectral_op.h +++ b/paddle/fluid/operators/spectral_op.h @@ -15,6 +15,7 @@ #include #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/eigen.h" @@ -393,8 +394,9 @@ class FFTR2CGradKernel : public framework::OpKernel { fft_c2c_func(dev_ctx, &full_dy, &complex_dx, axes, normalization, !forward); } - framework::TransComplexToReal(dx->type(), complex_dx.type(), complex_dx, - dx); + framework::TransComplexToReal( + framework::TransToProtoVarType(dx->dtype()), + framework::TransToProtoVarType(complex_dx.dtype()), complex_dx, dx); } }; diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc index d6d3f4c99aa05..cf913ffe94dd8 100644 --- a/paddle/fluid/operators/split_op_mlu.cc +++ b/paddle/fluid/operators/split_op_mlu.cc @@ -61,13 +61,15 @@ class SplitMLUKernel : public framework::OpKernel { for (size_t i = 0; i < outs.size(); i++) { outs[i]->mutable_data(ctx.GetPlace()); output_descs.emplace_back(MLUCnnlTensorDesc( - *outs[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(outs[i]->type()))); + *outs[i], CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(outs[i]->dtype())))); desc_vector.push_back(output_descs.back().get()); vct_tensor.push_back(GetBasePtr(outs[i])); } // init in tensors - MLUCnnlTensorDesc input_desc(*in, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(in->type())); + MLUCnnlTensorDesc input_desc( + *in, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(in->dtype()))); // MLU should do sth MLUCnnl::Split(ctx, num_tensor, axis, input_desc.get(), GetBasePtr(in), diff --git a/paddle/fluid/operators/strided_slice_op.cc b/paddle/fluid/operators/strided_slice_op.cc index a1b5ca0f6a6eb..9ed102514afcc 100644 --- a/paddle/fluid/operators/strided_slice_op.cc +++ b/paddle/fluid/operators/strided_slice_op.cc @@ -198,7 +198,9 @@ class StridedSliceOp : public framework::OperatorWithKernel { // NOTE: cuda pinned tensor need to copy its data to target place auto in_tensor = ctx.Input("Input"); if (platform::is_cuda_pinned_place(in_tensor->place())) { - return framework::OpKernelType(in_tensor->type(), ctx.device_context()); + return framework::OpKernelType( + framework::TransToProtoVarType(in_tensor->dtype()), + ctx.device_context()); } return framework::OpKernelType( OperatorWithKernel::IndicateVarDataType(ctx, "Input"), diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 00aab6b75006a..f79563a447142 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -22,6 +22,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -134,9 +135,10 @@ class SumOp : public framework::OperatorWithKernel { continue; } if (dtype == -1) { - dtype = tensor->type(); + dtype = framework::TransToProtoVarType(tensor->dtype()); } else { - PADDLE_ENFORCE_EQ(dtype, tensor->type(), + PADDLE_ENFORCE_EQ(dtype, + framework::TransToProtoVarType(tensor->dtype()), platform::errors::InvalidArgument( "The inputs type of sum op must be same")); } @@ -169,8 +171,9 @@ class SumOp : public framework::OperatorWithKernel { for (auto& var : x_vars) { auto& value = var->Get().value(); if (value.IsInitialized()) { - return framework::OpKernelType(value.type(), ctx.device_context(), - layout, library); + return framework::OpKernelType( + framework::TransToProtoVarType(value.dtype()), + ctx.device_context(), layout, library); } } // if input sparse vars are not initialized, use an default kernel type. @@ -181,8 +184,9 @@ class SumOp : public framework::OperatorWithKernel { auto& array = x_var->Get(); for (auto& each : array) { if (each.numel() != 0 && each.IsInitialized()) { - return framework::OpKernelType(each.type(), ctx.device_context(), - layout, library); + return framework::OpKernelType( + framework::TransToProtoVarType(each.dtype()), + ctx.device_context(), layout, library); } } } diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc index 59f3a424479bb..b84ac3c79876a 100644 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ b/paddle/fluid/operators/sum_op_mlu.cc @@ -43,13 +43,15 @@ class SumMLUKernel : public framework::OpKernel { std::vector desc_vector; for (int i = 0; i < ins_size; i++) { input_descs.emplace_back(MLUCnnlTensorDesc( - *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type()))); + *ins[i], CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(ins[i]->dtype())))); desc_vector.push_back(input_descs.back().get()); inputs.push_back(GetBasePtr(ins[i])); } // init out tensors - MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY, - ToCnnlDataType(out->type())); + MLUCnnlTensorDesc output_desc( + *out, CNNL_LAYOUT_ARRAY, + ToCnnlDataType(framework::TransToProtoVarType(out->dtype()))); uint32_t ins_size_t = static_cast(ins_size); MLUCnnl::AddN(ctx, ins_size_t, desc_vector.data(), inputs.data(), output_desc.get(), GetBasePtr(out)); diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu.h b/paddle/fluid/operators/sync_batch_norm_op.cu.h index 201de5ac1a428..17c96544988b6 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cu.h +++ b/paddle/fluid/operators/sync_batch_norm_op.cu.h @@ -26,6 +26,7 @@ limitations under the License. */ #include namespace cub = hipcub; #endif +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/batch_norm_op.h" @@ -189,7 +190,8 @@ void SyncBatchNormFunctor(const framework::ExecutionContext &ctx, #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) auto *comm = dev_ctx.nccl_comm(); if (comm) { - int dtype = platform::ToNCCLDataType(mean_out->type()); + int dtype = platform::ToNCCLDataType( + framework::TransToProtoVarType(mean_out->dtype())); // In-place operation PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( stats, stats, 2 * C + 1, static_cast(dtype), ncclSum, @@ -463,7 +465,8 @@ void SyncBatchNormGradFunctor( #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) auto *comm = dev_ctx.nccl_comm(); if (comm) { - int dtype = platform::ToNCCLDataType(scale->type()); + int dtype = platform::ToNCCLDataType( + framework::TransToProtoVarType(scale->dtype())); // In-place operation PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce( stats, stats, 2 * C + 1, static_cast(dtype), ncclSum, diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc index c666d5a11fa31..b5632f4fe4a84 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc @@ -398,7 +398,8 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { float device_counts = 0.0; if (comm) { - HcclDataType dtype = platform::ToHCCLDataType(mean_out->type()); + HcclDataType dtype = platform::ToHCCLDataType( + framework::TransToProtoVarType(mean_out->dtype())); Tensor device_count_tensor; { @@ -539,7 +540,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { multiples = {N, H, W, 1}; auto comm = paddle::platform::HCCLCommContext::Instance().Get(0, place); - HcclDataType dtype = platform::ToHCCLDataType(scale->type()); + HcclDataType dtype = platform::ToHCCLDataType( + framework::TransToProtoVarType(scale->dtype())); float device_counts = 0.0; if (comm) { @@ -695,7 +697,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { Tensor dy_mul_x_sub_mean_for_scale; { - if (d_y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(d_y->dtype()) == + framework::proto::VarType::FP16) { dy_mul_x_sub_mean_for_scale.Resize(x->dims()); dy_mul_x_sub_mean_for_scale.mutable_data(place); const auto &runner = NpuOpRunner("Mul", {*d_y, x_sub_saved_mean}, @@ -712,7 +715,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { Tensor dy_mul_x_sub_mean; { - if (d_y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(d_y->dtype()) == + framework::proto::VarType::FP16) { dy_mul_x_sub_mean.Resize(x->dims()); dy_mul_x_sub_mean.mutable_data(place); const auto &runner = NpuOpRunner("Mul", {*d_y, x_sub_saved_mean}, @@ -751,7 +755,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { if (d_x) { Tensor dy_mean; { - if (d_y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(d_y->dtype()) == + framework::proto::VarType::FP16) { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; dy_mean.Resize({C}); @@ -813,7 +818,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { Tensor dy_sub_dy_mean; { - if (d_y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(d_y->dtype()) == + framework::proto::VarType::FP16) { dy_sub_dy_mean.Resize(x->dims()); dy_sub_dy_mean.mutable_data(place); const auto &runner = @@ -964,7 +970,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // cacl d_bias if (d_bias) { - if (d_y->type() == framework::proto::VarType::FP16) { + if (framework::TransToProtoVarType(d_y->dtype()) == + framework::proto::VarType::FP16) { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; d_bias->mutable_data(place); diff --git a/paddle/fluid/operators/take_along_axis_op.cu b/paddle/fluid/operators/take_along_axis_op.cu index 2d0ebbc20f215..d524d31c461cf 100644 --- a/paddle/fluid/operators/take_along_axis_op.cu +++ b/paddle/fluid/operators/take_along_axis_op.cu @@ -32,7 +32,7 @@ class TakeAlongAxisCUDAKernel : public framework::OpKernel { auto result = ctx.Output("Result"); result->Resize(index->dims()); result->mutable_data(ctx.GetPlace()); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { gpu_gather_kernel(*input, axis, *index, *result, ctx.device_context()); @@ -66,7 +66,7 @@ class TakeAlongAxisGradOpCUDAKernel : public framework::OpKernel { pten::funcs::SetConstant functor; functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { gpu_scatter_add_kernel( diff --git a/paddle/fluid/operators/take_along_axis_op.h b/paddle/fluid/operators/take_along_axis_op.h index e7f804621b3f4..66e2f8898b3a0 100644 --- a/paddle/fluid/operators/take_along_axis_op.h +++ b/paddle/fluid/operators/take_along_axis_op.h @@ -41,7 +41,7 @@ class TakeAlongAxisOpKernel : public framework::OpKernel { result->Resize(index->dims()); result->mutable_data(ctx.GetPlace()); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { cpu_gather_kernel(*input, axis, *index, *result, ctx.device_context()); @@ -76,7 +76,7 @@ class TakeAlongAxisGradOpKernel : public framework::OpKernel { functor(reinterpret_cast(dev_ctx), input_grad, static_cast(0)); - const auto &index_type = index->type(); + const auto &index_type = framework::TransToProtoVarType(index->dtype()); if (index_type == framework::proto::VarType::INT32) { cpu_scatter_add_kernel( *input_grad, axis, *index, *result_grad, diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index 3549fc6c45e8d..963dfd3bf7720 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -108,7 +108,8 @@ class TDMChildKernel : public framework::OpKernel { auto *tree_info_var = ctx.InputVar("TreeInfo"); auto &input_tensor = input_var->Get(); - const auto &input_type = input_tensor.type(); + const auto &input_type = + framework::TransToProtoVarType(input_tensor.dtype()); bool input_type_match = input_type == framework::proto::VarType::INT32 || input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, @@ -122,7 +123,8 @@ class TDMChildKernel : public framework::OpKernel { framework::proto::VarType::INT64))); auto &tree_info_tensor = tree_info_var->Get(); - const auto &info_type = tree_info_tensor.type(); + const auto &info_type = + framework::TransToProtoVarType(tree_info_tensor.dtype()); bool info_type_match = info_type == framework::proto::VarType::INT32 || info_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index b740f34b0a346..bf752a9c8ad78 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -244,7 +244,8 @@ class TDMSamplerKernel : public framework::OpKernel { auto &travel_lod_tensor = travel_var->Get(); auto &layer_lod_tensor = layer_var->Get(); - const auto &input_type = input_tensor.type(); + const auto &input_type = + framework::TransToProtoVarType(input_tensor.dtype()); bool input_type_match = input_type == framework::proto::VarType::INT32 || input_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(input_type_match, true, @@ -257,7 +258,8 @@ class TDMSamplerKernel : public framework::OpKernel { paddle::framework::DataTypeToString( framework::proto::VarType::INT64))); - const auto &travel_type = travel_lod_tensor.type(); + const auto &travel_type = + framework::TransToProtoVarType(travel_lod_tensor.dtype()); bool travel_type_match = travel_type == framework::proto::VarType::INT32 || travel_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ( @@ -271,7 +273,8 @@ class TDMSamplerKernel : public framework::OpKernel { paddle::framework::DataTypeToString( framework::proto::VarType::INT64))); - const auto &layer_type = layer_lod_tensor.type(); + const auto &layer_type = + framework::TransToProtoVarType(layer_lod_tensor.dtype()); bool layer_type_match = layer_type == framework::proto::VarType::INT32 || layer_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(layer_type_match, true, diff --git a/paddle/fluid/operators/tensor_formatter.cc b/paddle/fluid/operators/tensor_formatter.cc index b65ebee9b2662..ef46ee25156e5 100644 --- a/paddle/fluid/operators/tensor_formatter.cc +++ b/paddle/fluid/operators/tensor_formatter.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/tensor_formatter.h" #include +#include "paddle/fluid/framework/convert_utils.h" namespace paddle { namespace operators { @@ -90,7 +91,8 @@ std::string TensorFormatter::Format(const framework::LoDTensor& print_tensor, << std::endl; } - std::type_index dtype = framework::ToTypeIndex(print_tensor.type()); + std::type_index dtype = framework::ToTypeIndex( + framework::TransToProtoVarType(print_tensor.dtype())); if (print_tensor_type_) { log_stream << " - dtype: " << platform::demangle(dtype.name()) << std::endl; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index f67295a5dbf7c..41d5a010fd55e 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -513,7 +513,7 @@ class TensorRTEngineOp : public framework::OperatorBase { #endif } runtime_batch = t_shape[0]; - auto type = t.type(); + auto type = framework::TransToProtoVarType(t.dtype()); if (type == framework::proto::VarType::FP32) { buffers[bind_index] = static_cast(t.data()); } else if (type == framework::proto::VarType::INT64) { diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc index bd736f9fc913d..ba7c39e955eee 100644 --- a/paddle/fluid/operators/top_k_op_npu.cc +++ b/paddle/fluid/operators/top_k_op_npu.cc @@ -64,7 +64,7 @@ class TopkNPUKernel : public framework::OpKernel { {"dim", -1}, {"largest", true}}; - Tensor tmp_indices(framework::proto::VarType::INT32); + Tensor tmp_indices(experimental::DataType::INT32); tmp_indices.Resize(indices->dims()); tmp_indices.mutable_data(ctx.GetPlace()); @@ -77,7 +77,8 @@ class TopkNPUKernel : public framework::OpKernel { runner.Run(stream); // cast indices from INT32 to INT64 - auto dst_dtype = ConvertToNpuDtype(indices->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(indices->dtype())); const auto& runner_cast_indices = NpuOpRunner("Cast", {tmp_indices}, {*indices}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/top_k_v2_op_npu.cc b/paddle/fluid/operators/top_k_v2_op_npu.cc old mode 100755 new mode 100644 index 843f7620cac44..e11070638834c --- a/paddle/fluid/operators/top_k_v2_op_npu.cc +++ b/paddle/fluid/operators/top_k_v2_op_npu.cc @@ -57,7 +57,7 @@ class TopkV2NPUKernel : public framework::OpKernel { out->mutable_data(context.GetPlace()); indices->mutable_data(context.GetPlace()); - framework::Tensor indices_int32(framework::proto::VarType::INT32); + framework::Tensor indices_int32(experimental::DataType::INT32); indices_int32.Resize(output_dims); indices_int32.mutable_data(context.GetPlace()); @@ -77,7 +77,8 @@ class TopkV2NPUKernel : public framework::OpKernel { .Run(npu_stream); // Cast 'indices_int32' to 'indices', from INT32 to INT64 - auto dst_dtype = ConvertToNpuDtype(indices->type()); + auto dst_dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(indices->type())); const auto& npu_op_runner_cast = NpuOpRunner("Cast", {indices_int32}, {*indices}, {{"dst_type", static_cast(dst_dtype)}}); diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 47c9d1dba91b9..64dd24b4d638a 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -116,7 +116,8 @@ class TransferLayoutFunctor { out->mutable_data(in.place(), in.type()); framework::VisitDataType( - in.type(), framework::CastDataLayout(&dev_ctx, axis, in, out)); + framework::TransToProtoVarType(in.dtype()), + framework::CastDataLayout(&dev_ctx, axis, in, out)); } const framework::Variable *in_; diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index 7320c1314c6f3..c15c1ffe93d60 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -250,7 +250,8 @@ class Transpose2Op : public TransposeOp { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; using framework::proto::VarType; - auto input_data_type = ctx.Input("X")->type(); + auto input_data_type = + framework::TransToProtoVarType(ctx.Input("X")->dtype()); customized_type_value = (input_data_type == VarType::INT8 || input_data_type == VarType::UINT8) ? kTransposeMKLDNNINT8 diff --git a/paddle/fluid/operators/tril_triu_op_npu.cc b/paddle/fluid/operators/tril_triu_op_npu.cc index 02af711567f84..ad1c1814c05cd 100644 --- a/paddle/fluid/operators/tril_triu_op_npu.cc +++ b/paddle/fluid/operators/tril_triu_op_npu.cc @@ -52,7 +52,8 @@ class TrilTriuNPUKernel : public framework::OpKernel { runner.Run(dev_ctx.stream()); }; - if (x->type() == framework::proto::VarType::BOOL) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::BOOL) { if (lower) { NpuOpRunner::TypeAdapter({*x}, {*out}, attr_input, dev_ctx, op_func_tril, diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc index f88fefd1c6a78..261d9cee2d5cd 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/truncated_gaussian_random_op.h" #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { @@ -28,29 +29,29 @@ class TruncatedGaussianRandomNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { // TODO(zhiqiu): support dynamic shape and call ParameterizedTruncatedNormal std::vector shape = ctx.Attr>("shape"); - Tensor shape_tensor(framework::proto::VarType::INT32); + Tensor shape_tensor(experimental::DataType::INT32); shape_tensor.mutable_data({static_cast(shape.size())}, ctx.GetPlace()); paddle::framework::TensorFromVector(shape, ctx.device_context(), &shape_tensor); float mean = ctx.Attr("mean"); - Tensor mean_tensor(framework::proto::VarType::FP32); + Tensor mean_tensor(experimental::DataType::FLOAT32); mean_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&mean_tensor, mean); float std = ctx.Attr("std"); - Tensor std_tensor(framework::proto::VarType::FP32); + Tensor std_tensor(experimental::DataType::FLOAT32); std_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&std_tensor, std); int32_t seed_var = ctx.Attr("seed"); - Tensor min_tensor(framework::proto::VarType::FP32); + Tensor min_tensor(experimental::DataType::FLOAT32); min_tensor.mutable_data({1}, ctx.GetPlace()); float min_value = mean - std * 2.0; FillNpuTensorWithConstant(&min_tensor, min_value); - Tensor max_tensor(framework::proto::VarType::FP32); + Tensor max_tensor(experimental::DataType::FLOAT32); max_tensor.mutable_data({1}, ctx.GetPlace()); float max_value = mean + std * 2.0; FillNpuTensorWithConstant(&max_tensor, max_value); @@ -80,7 +81,7 @@ class NPUTruncatedGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - Tensor cpu_tensor(tensor->type()); + Tensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); std::uniform_real_distribution dist(std::numeric_limits::min(), diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index f4ae8d8269056..adafbd1fd484e 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -25,7 +25,8 @@ using Tensor = framework::Tensor; inline std::vector GetNewDataFromShapeTensor( const Tensor* new_data_tensor) { - if (new_data_tensor->type() == framework::proto::VarType::INT64) { + if (framework::TransToProtoVarType(new_data_tensor->dtype()) == + framework::proto::VarType::INT64) { auto* new_data = new_data_tensor->data(); framework::Tensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place())) { @@ -36,7 +37,8 @@ inline std::vector GetNewDataFromShapeTensor( std::vector vec_new_data(new_data, new_data + new_data_tensor->numel()); return vec_new_data; - } else if (new_data_tensor->type() == framework::proto::VarType::INT32) { + } else if (framework::TransToProtoVarType(new_data_tensor->dtype()) == + framework::proto::VarType::INT32) { auto* new_data = new_data_tensor->data(); std::vector vec_new_data; framework::Tensor cpu_starts_tensor; @@ -53,7 +55,7 @@ inline std::vector GetNewDataFromShapeTensor( PADDLE_THROW(platform::errors::InvalidArgument( "Expected dtype of ShapeTensor must be int32, int64. But got " "unsupport dtype: %s.", - paddle::framework::DataTypeToString(new_data_tensor->type()))); + new_data_tensor->dtype())); } } @@ -70,7 +72,8 @@ inline std::vector GetNewDataFromShapeTensorList( "But received tensor's dim=%s.", tensor->dims())); - if (tensor->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(tensor->dtype()) == + framework::proto::VarType::INT32) { if (platform::is_gpu_place(tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); @@ -78,7 +81,8 @@ inline std::vector GetNewDataFromShapeTensorList( } else { vec_new_shape.push_back(static_cast(*tensor->data())); } - } else if (tensor->type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(tensor->dtype()) == + framework::proto::VarType::INT64) { if (platform::is_gpu_place(tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); @@ -91,7 +95,8 @@ inline std::vector GetNewDataFromShapeTensorList( "Expected dtype of ShapeTensorList of %d-th must be int32, int64. " "But got " "unsupport dtype: %s.", - i, paddle::framework::DataTypeToString(tensor->type()))); + i, paddle::framework::DataTypeToString( + framework::TransToProtoVarType(tensor->dtype())))); } } diff --git a/paddle/fluid/operators/uniform_random_op_npu.cc b/paddle/fluid/operators/uniform_random_op_npu.cc index 6812a2b0b7085..269cf7cc11eb6 100644 --- a/paddle/fluid/operators/uniform_random_op_npu.cc +++ b/paddle/fluid/operators/uniform_random_op_npu.cc @@ -60,7 +60,7 @@ class NPUUniformRandomKernel : public framework::OpKernel { tensor->mutable_data(ctx.GetPlace()); int64_t size = tensor->numel(); - Tensor cpu_tensor(tensor->type()); + Tensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T *data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index c3d291d1201c6..8c3e33d44dcc5 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -77,7 +77,7 @@ struct UniqueOpFunctor { // init count_data to 0 memset(count_data, 0, uniq.size() * sizeof(IndexT)); - const auto& index_type = index_->type(); + const auto& index_type = framework::TransToProtoVarType(index_->dtype()); bool index_type_match = index_type == framework::proto::VarType::INT32 || index_type == framework::proto::VarType::INT64; PADDLE_ENFORCE_EQ(index_type_match, true, diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index e2cbf73aa1316..82b1aab7bfb7f 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -132,8 +132,10 @@ class UnsqueezeOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - return framework::OpKernelType(ctx.Input("X")->type(), - ctx.device_context()); + return framework::OpKernelType( + framework::TransToProtoVarType( + ctx.Input("X")->type()), + ctx.device_context()); } framework::OpKernelType GetKernelTypeForVar( diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index a413c4a331b65..78506c12bc6d7 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -23,7 +23,8 @@ namespace operators { template inline std::vector GetDataFromTensor(const framework::Tensor* x) { std::vector vec_new_data; - if (x->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT32) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; if (!platform::is_cpu_place(x->place())) { @@ -32,7 +33,8 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { data = cpu_attr_tensor.data(); } vec_new_data = std::vector(data, data + x->numel()); - } else if (x->type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(x->dtype()) == + framework::proto::VarType::INT64) { auto* data = x->data(); framework::Tensor cpu_attr_tensor; if (!platform::is_cpu_place(x->place())) { @@ -45,7 +47,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { } else { PADDLE_THROW(platform::errors::InvalidArgument( "The dtype of Tensor must be int32 or int64, but received: %s", - x->type())); + framework::TransToProtoVarType(x->dtype()))); } return vec_new_data; } @@ -63,7 +65,8 @@ inline std::vector GetDataFromTensorList( "is [%s]", tensor->dims())); - if (tensor->type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(tensor->dtype()) == + framework::proto::VarType::INT32) { if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); @@ -71,7 +74,8 @@ inline std::vector GetDataFromTensorList( } else { vec_new_data.push_back(static_cast(*tensor->data())); } - } else if (tensor->type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(tensor->dtype()) == + framework::proto::VarType::INT64) { if (!platform::is_cpu_place(tensor->place())) { framework::Tensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); @@ -84,7 +88,7 @@ inline std::vector GetDataFromTensorList( PADDLE_THROW(platform::errors::InvalidArgument( "The dtype of Tensor in list must be int32 or int64, but received: " "%s", - tensor->type())); + tensor->dtype())); } } return vec_new_data; diff --git a/paddle/fluid/operators/where_index_op_npu.cc b/paddle/fluid/operators/where_index_op_npu.cc index 226f1461ed439..8c7c4e25d692e 100644 --- a/paddle/fluid/operators/where_index_op_npu.cc +++ b/paddle/fluid/operators/where_index_op_npu.cc @@ -37,7 +37,8 @@ class NPUWhereIndexKernel : public framework::OpKernel { // Run Cast and ReduceSum to get 0 dim of Out Tensor booled_cond; - if (condition->type() != framework::proto::VarType::BOOL) { + if (framework::TransToProtoVarType(condition->dtype()) != + framework::proto::VarType::BOOL) { auto bool_type = ConvertToNpuDtype(framework::proto::VarType::BOOL); booled_cond.mutable_data(dims, place); const auto& booled_runner = diff --git a/paddle/fluid/operators/where_op_npu.cc b/paddle/fluid/operators/where_op_npu.cc index bb77f35daceb0..d4294393daa34 100755 --- a/paddle/fluid/operators/where_op_npu.cc +++ b/paddle/fluid/operators/where_op_npu.cc @@ -58,7 +58,7 @@ class WhereGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - framework::Tensor tensor_zeros(dout_t->type()); + framework::Tensor tensor_zeros(dout_t->dtype()); tensor_zeros.mutable_data(dout_t->dims(), ctx.GetPlace()); const auto& runner = NpuOpRunner("ZerosLike", {*dout_t}, {tensor_zeros}, {}); diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h index a75759e2ae079..9b02d11b3f97f 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h @@ -23,6 +23,7 @@ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h" #include "paddle/fluid/platform/device_context.h" @@ -152,8 +153,9 @@ class TensorDescriptor { dims_with_group[1] = dims_with_group[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor( - desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(), - dims_with_group.data(), strides.data())); + desc_.get(), + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), + dims_with_group.size(), dims_with_group.data(), strides.data())); } void set(const std::vector& dims, const cudnnTensorFormat_t format, @@ -171,7 +173,8 @@ class TensorDescriptor { void set(const Tensor& tensor, const cudnnTensorFormat_t format) { auto dims = framework::vectorize(tensor.dims()); - auto dtype = ToCudnnDataType(tensor.type()); + auto dtype = + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())); set(dims, format, dtype); } @@ -217,7 +220,8 @@ class FilterDescriptor { void set(const Tensor& tensor, const cudnnTensorFormat_t format, const int groups = 1) { auto dims = framework::vectorize(tensor.dims()); - auto dtype = ToCudnnDataType(tensor.type()); + auto dtype = + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())); set(dims, format, dtype, groups); } diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h index f4533c859fcd3..7acf713d68809 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h @@ -142,7 +142,8 @@ class TensorDescriptor { dims_with_group[1] = dims_with_group[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( - (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()), + (miopenTensorDescriptor_t)(desc_.get()), + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), static_cast(dims_with_group.size()), const_cast(dims_with_group.data()), const_cast(strides.data()))); @@ -164,7 +165,8 @@ class TensorDescriptor { dims_with_group[1] = dims_with_group[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( - (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()), + (miopenTensorDescriptor_t)(desc_.get()), + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), static_cast(dims_with_group.size()), const_cast(dims_with_group.data()), const_cast(strides.data()))); @@ -209,7 +211,8 @@ class FilterDescriptor { dims_with_group[1] = dims_with_group[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor( - (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()), + (miopenTensorDescriptor_t)(desc_.get()), + ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())), static_cast(dims_with_group.size()), const_cast(dims_with_group.data()), const_cast(strides.data()))); diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index 8bedca5c0b82f..93c5cc90762ca 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -297,7 +297,9 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) { tensor->Resize(ddim); auto const_data = std::unique_ptr(); - popart::TensorInfo tensor_info(VarType2PopartType(tensor->type()), shape); + popart::TensorInfo tensor_info( + VarType2PopartType(framework::TransToProtoVarType(tensor->dtype())), + shape); const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info)); popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data); SetIpuIndexStage(result, op_desc); @@ -325,7 +327,8 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) { auto var = scope->FindVar(var_name); if (var) { auto tensor = var->Get(); - auto dtype = VarType2PopartType(tensor.type()); + auto dtype = VarType2PopartType( + framework::TransToProtoVarType(tensor.dtype())); auto shape = std::vector(); for (size_t i = 0; i < tensor.dims().size(); ++i) { shape.push_back(tensor.dims().at(i)); diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index df2f1c786e947..5da785c1b6f4a 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -112,7 +112,8 @@ void Executor::Run(const std::vector &inputs, tensor->Resize(framework::make_ddim(output_shape)); auto fetch_dtype = fetch_info.dataType(); auto paddle_type = PopartType2VarType(fetch_dtype); - tensor->mutable_data(ctx.GetPlace(), paddle_type); + tensor->mutable_data(ctx.GetPlace(), + framework::TransToPtenDataType(paddle_type)); anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor)); popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id)); } diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h index 3cd7115b5ebf3..1b8d25acff473 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.h +++ b/paddle/fluid/platform/device/ipu/ipu_utils.h @@ -89,7 +89,8 @@ bool GetBoolEnv(std::string str); template std::unique_ptr> Tensor2IArray(const Tensor& tensor) { - auto dtype = VarType2PopartType(tensor.type()); + auto dtype = + VarType2PopartType(framework::TransToProtoVarType(tensor.dtype())); auto shape = std::vector(); for (size_t i = 0; i < tensor.dims().size(); ++i) { shape.push_back(tensor.dims().at(i)); diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.cc b/paddle/fluid/platform/device/npu/npu_op_runner.cc index b9c92e07612b0..14bad6f7479c3 100644 --- a/paddle/fluid/platform/device/npu/npu_op_runner.cc +++ b/paddle/fluid/platform/device/npu/npu_op_runner.cc @@ -373,7 +373,8 @@ std::vector &NpuOpRunner::GetOutputBuffers() { aclTensorDesc *NpuOpRunner::CreateTensorDesc(Tensor tensor, aclMemType mem_type) { - auto dtype = ConvertToNpuDtype(tensor.type()); + auto dtype = + ConvertToNpuDtype(framework::TransToProtoVarType(tensor.dtype())); auto format = ConvertToNpuFormat(tensor.layout()); auto dims = framework::vectorize(tensor.dims()); int size = dims.size(); @@ -459,12 +460,14 @@ void NpuOpRunner::TypeAdapter( for (size_t i = 0; i < input_type.size(); ++i) { bool cast_input = - (input_type[i] == -1 || input_type[i] != inputs[i].type()); + (input_type[i] == -1 || + input_type[i] != framework::TransToProtoVarType(inputs[i].dtype())); if (!cast_input) { tmp_inputs[i].ShareDataWith(inputs[i]); } else { tmp_inputs[i].Resize(inputs[i].dims()); - tmp_inputs[i].mutable_data(dev_ctx.GetPlace(), input_type[i]); + tmp_inputs[i].mutable_data(dev_ctx.GetPlace(), + framework::TransToPtenDataType(input_type[i])); const auto &cast_runner = NpuOpRunner( "Cast", {inputs[i]}, {tmp_inputs[i]}, @@ -474,12 +477,14 @@ void NpuOpRunner::TypeAdapter( } for (size_t i = 0; i < output_type.size(); ++i) { bool cast_output = - (output_type[i] == -1 || output_type[i] != outputs[i].type()); + (output_type[i] == -1 || + output_type[i] != framework::TransToProtoVarType(outputs[i].dtype())); if (!cast_output) { tmp_outputs[i].ShareDataWith(outputs[i]); } else { tmp_outputs[i].Resize(outputs[i].dims()); - tmp_outputs[i].mutable_data(dev_ctx.GetPlace(), output_type[i]); + tmp_outputs[i].mutable_data( + dev_ctx.GetPlace(), framework::TransToPtenDataType(output_type[i])); } } @@ -487,12 +492,14 @@ void NpuOpRunner::TypeAdapter( for (size_t i = 0; i < output_type.size(); ++i) { bool cast_output = - (output_type[i] == -1 || output_type[i] != outputs[i].type()); + (output_type[i] == -1 || + output_type[i] != framework::TransToProtoVarType(outputs[i].dtype())); if (cast_output) { const auto &cast_runner = NpuOpRunner( "Cast", {tmp_outputs[i]}, {outputs[i]}, {{"dst_type", - static_cast(ConvertToNpuDtype(outputs[i].type()))}}); + static_cast(ConvertToNpuDtype( + framework::TransToProtoVarType(outputs[i].dtype())))}}); cast_runner.Run(dev_ctx.stream()); } } diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.h b/paddle/fluid/platform/device/npu/npu_op_runner.h index 39d2b9ffa9b1d..7583e00343575 100644 --- a/paddle/fluid/platform/device/npu/npu_op_runner.h +++ b/paddle/fluid/platform/device/npu/npu_op_runner.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include "acl/acl.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device/npu/enforce_npu.h" @@ -143,7 +144,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) { int numel = tensor->numel(); if (numel == 1) { - Tensor npu_pinned_tensor(tensor->type()); + Tensor npu_pinned_tensor(tensor->dtype()); platform::NPUPinnedPlace npu_pinned_place; auto npu_pinned_ptr = npu_pinned_tensor.mutable_data({1}, npu_pinned_place); diff --git a/paddle/fluid/platform/lodtensor_printer.cc b/paddle/fluid/platform/lodtensor_printer.cc index ca5d156802851..159e7354279ad 100644 --- a/paddle/fluid/platform/lodtensor_printer.cc +++ b/paddle/fluid/platform/lodtensor_printer.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/platform/lodtensor_printer.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" namespace pten { @@ -50,29 +51,29 @@ void PrintVar(framework::Scope* scope, const std::string& var_name, *sstream << print_info; -#define PrintTensorCallback(cpp_type, proto_type) \ - do { \ - if (tensor->type() == proto_type) { \ - *sstream << "["; \ - const cpp_type* data = nullptr; \ - framework::LoDTensor cpu_tensor; \ - if (is_cpu_place(tensor->place())) { \ - data = tensor->data(); \ - } else { \ - platform::CPUPlace cpu_place; \ - paddle::framework::TensorCopy(*tensor, cpu_place, &cpu_tensor); \ - data = cpu_tensor.data(); \ - } \ - auto element_num = tensor->numel(); \ - *sstream << element_num << "]:["; \ - if (element_num > 0) { \ - *sstream << data[0]; \ - for (int j = 1; j < element_num; ++j) { \ - *sstream << " " << data[j]; \ - } \ - } \ - *sstream << "]"; \ - } \ +#define PrintTensorCallback(cpp_type, proto_type) \ + do { \ + if (framework::TransToProtoVarType(tensor->dtype()) == proto_type) { \ + *sstream << "["; \ + const cpp_type* data = nullptr; \ + framework::LoDTensor cpu_tensor; \ + if (is_cpu_place(tensor->place())) { \ + data = tensor->data(); \ + } else { \ + platform::CPUPlace cpu_place; \ + paddle::framework::TensorCopy(*tensor, cpu_place, &cpu_tensor); \ + data = cpu_tensor.data(); \ + } \ + auto element_num = tensor->numel(); \ + *sstream << element_num << "]:["; \ + if (element_num > 0) { \ + *sstream << data[0]; \ + for (int j = 1; j < element_num; ++j) { \ + *sstream << " " << data[j]; \ + } \ + } \ + *sstream << "]"; \ + } \ } while (0) _ForEachDataType_(PrintTensorCallback); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 69fe068a9b4a8..8d706263f029c 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1050,7 +1050,8 @@ class ReorderMKLDNNHandler { const MKLDNNMemoryFormat& fmt, platform::Place place) { auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt); - auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); + auto dst_data = output->mutable_data( + place, framework::TransToPtenDataType(vtype_dst_), dst_md.get_size()); return std::make_shared(dst_md, engine_, dst_data); } @@ -1058,7 +1059,8 @@ class ReorderMKLDNNHandler { framework::Tensor* output, const std::vector& dims, const MKLDNNMemoryFormat& fmt, platform::Place place) { auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt); - auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size()); + auto dst_data = output->mutable_data( + place, framework::TransToPtenDataType(vtype_dst_), dst_md.get_size()); return std::make_shared(dst_md, engine_, dst_data); } diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 1db7339664ace..f4e5df800dada 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" @@ -79,7 +80,8 @@ void EmptyEagerTensorInitializer( std::shared_ptr dense_tensor = std::make_shared( pten::make_intrusive(place), - pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims)); + pten::DenseTensorMeta(paddle::framework::TransToPtenDataType(dtype), + ddims)); dense_tensor->mutable_data(place); self->tensor.set_impl(dense_tensor); } else { diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 7aa810acc28cd..a32edae2ad23c 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" @@ -59,7 +60,7 @@ class EagerNumpyAllocation : public pten::Allocation { explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype) : Allocation( static_cast(pybind11::detail::array_proxy(numpy_data)->data), - pten::DataTypeSize(dtype) * PyArray_Size_(numpy_data), + framework::DataTypeSize(dtype) * PyArray_Size_(numpy_data), paddle::platform::CPUPlace()), arr_(numpy_data) { PADDLE_ENFORCE_NOT_NULL(arr_, platform::errors::InvalidArgument( diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 511cc10d645ea..6865379036608 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" @@ -51,7 +52,7 @@ static PyObject* eager_tensor_method_numpy(TensorObject* self, PyObject* args, self->tensor.name())); auto tensor_dims = self->tensor.shape(); auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type()); - auto sizeof_dtype = pten::DataTypeSize(self->tensor.type()); + auto sizeof_dtype = paddle::framework::DataTypeSize(self->tensor.type()); Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank]; Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank]; size_t numel = 1; @@ -83,7 +84,8 @@ static PyObject* eager_tensor_method_numpy(TensorObject* self, PyObject* args, paddle::platform::GpuMemcpySync( pybind11::detail::array_proxy(array)->data, dense_tensor->data(), - pten::DataTypeSize(dense_tensor->dtype()) * dense_tensor->numel(), + paddle::framework::DataTypeSize(dense_tensor->dtype()) * + dense_tensor->numel(), cudaMemcpyDeviceToHost); #endif } else { diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 9fb0941fa361b..942df3f69dac0 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -172,7 +172,8 @@ PyObject* eager_tensor_properties_get_place_str(TensorObject* self, PyObject* eager_tensor_properties_get_dtype(TensorObject* self, void* closure) { EAGER_SYNC_TRY - return ToPyObject(pten::TransToProtoVarType(self->tensor.type())); + return ToPyObject( + paddle::framework::TransToProtoVarType(self->tensor.type())); EAGER_CATCH_AND_THROW_RETURN_NULL } diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 61b8f1fe010d9..c84a71d8aaa00 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -30,6 +30,7 @@ limitations under the License. */ #include #include "paddle/fluid/eager/api/all.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope_guard.h" #include "paddle/fluid/imperative/all_reduce.h" #include "paddle/fluid/imperative/amp_auto_cast.h" @@ -187,7 +188,7 @@ static void InitVarBaseAndTensor( "Place should be one of " "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/MLUPlace")); } - self->SetDataType(tensor->type()); + self->SetDataType(framework::TransToProtoVarType(tensor->dtype())); } static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self, @@ -242,7 +243,7 @@ static void InitVarBaseFromNumpyWithArg(imperative::VarBase *self, } SetTensorFromPyArray

(tensor, array, place, zero_copy); self->SetType(framework::proto::VarType::LOD_TENSOR); - self->SetDataType(tensor->type()); + self->SetDataType(framework::TransToProtoVarType(tensor->dtype())); } static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self, @@ -264,7 +265,7 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self, new (self) imperative::VarBase(name_); self->SetPersistable(false); self->SetType(framework::proto::VarType::LOD_TENSOR); - self->SetDataType(tensor.type()); + self->SetDataType(framework::TransToProtoVarType(tensor.dtype())); auto *new_tensor = self->MutableVar()->GetMutable(); // Same place,share data directly if (place == tensor.place()) { @@ -289,7 +290,7 @@ static void InitVarBaseFromTensorWithArg(imperative::VarBase *self, new (self) imperative::VarBase(name_); self->SetPersistable(false); self->SetType(framework::proto::VarType::LOD_TENSOR); - self->SetDataType(tensor.type()); + self->SetDataType(framework::TransToProtoVarType(tensor.dtype())); auto *new_tensor = self->MutableVar()->GetMutable(); // Same place,share data directly if (platform::is_same_place(place, tensor.place())) { @@ -433,9 +434,11 @@ static Py_ssize_t GetSliceIndexFromTensor( const std::shared_ptr &tensor_index) { const auto &tensor = tensor_index->Var().Get(); if (tensor.numel() == 1) { - if (tensor.type() == framework::proto::VarType::INT32) { + if (framework::TransToProtoVarType(tensor.dtype()) == + framework::proto::VarType::INT32) { return static_cast(operators::GetValue(&tensor)); - } else if (tensor.type() == framework::proto::VarType::INT64) { + } else if (framework::TransToProtoVarType(tensor.dtype()) == + framework::proto::VarType::INT64) { return static_cast(operators::GetValue(&tensor)); } else { PADDLE_THROW(platform::errors::InvalidArgument( @@ -787,7 +790,7 @@ void BindImperative(py::module *m_ptr) { platform::CPUPlace(), true); // 3. allocate shared memory void *data_ptr = t.data(); - size_t data_size = t.numel() * framework::SizeOfType(t.type()); + size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); auto shared_writer_holder = memory::allocation::AllocateMemoryMapWriterAllocation(data_size); // 4. maintain mmap fd set & backup ipc_name @@ -822,7 +825,7 @@ void BindImperative(py::module *m_ptr) { platform::CPUPlace(), true); // 3. allocate shared memory void *data_ptr = t.data(); - size_t data_size = t.numel() * framework::SizeOfType(t.type()); + size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); auto shared_writer_holder = memory::allocation::AllocateMemoryMapWriterAllocation(data_size); // 4. maintain mmap fd set & backup ipc_name @@ -1314,7 +1317,7 @@ void BindImperative(py::module *m_ptr) { } } #define TENSOR_TO_PY_SCALAR(T, proto_type) \ - if (tensor.type() == proto_type) { \ + if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \ std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(proto_type); \ T b = TensorGetElement(tensor, offset); \ return py::array(py::dtype(py_dtype_str.c_str()), {}, {}, \ @@ -1324,8 +1327,7 @@ void BindImperative(py::module *m_ptr) { _ForEachDataType_(TENSOR_TO_PY_SCALAR); #undef TENSOR_TO_PY_SCALAR PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported tensor data type: %s", - framework::DataTypeToString(tensor.type()))); + "Unsupported tensor data type: %s", tensor.dtype())); }, py::return_value_policy::copy) .def("_inplace_version", @@ -1852,7 +1854,9 @@ void BindImperative(py::module *m_ptr) { auto *t = self->MutableVar()->GetMutable(); // 2. allocate shared memory void *data_ptr = t->data(); - size_t data_size = t->numel() * framework::SizeOfType(t->type()); + size_t data_size = + t->numel() * framework::SizeOfType( + framework::TransToProtoVarType(t->dtype())); auto shared_writer_holder = memory::allocation::AllocateMemoryMapWriterAllocation( data_size); @@ -1887,7 +1891,9 @@ void BindImperative(py::module *m_ptr) { // Register the cpu memory as the cuda host memory const auto &data_numel = self_tensor->numel(); const size_t &need_allocate_size = - data_numel * framework::SizeOfType(self_tensor->type()); + data_numel * + framework::SizeOfType( + framework::TransToProtoVarType(self_tensor->dtype())); void *data_ptr = self_tensor->data(); auto result = cudaHostRegister(data_ptr, need_allocate_size, cudaHostRegisterDefault); @@ -1907,7 +1913,7 @@ void BindImperative(py::module *m_ptr) { std::make_shared( cuda_device_pointer, need_allocate_size, platform::CUDAPlace(device_id)); - self_tensor->ResetHolderWithType(holder, self_tensor->type()); + self_tensor->ResetHolderWithType(holder, self_tensor->dtype()); }, py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC( Returns self tensor with the UVA(unified virtual addressing). diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 8f20daf337d2c..959e34afe3da6 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -28,6 +28,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_type_transform.h" @@ -458,7 +459,9 @@ static void inline CreateVariableIfNotExit( var = const_cast(&scope)->Var(para_name); auto *tensor_temp = var->GetMutable(); tensor_temp->Resize(framework::make_ddim(var_desc.GetShape())); - tensor_temp->mutable_data(exe->GetPlace(), var_desc.GetDataType()); + tensor_temp->mutable_data( + exe->GetPlace(), + framework::TransToPtenDataType(var_desc.GetDataType())); } } } else { @@ -817,33 +820,39 @@ PYBIND11_MODULE(core_noavx, m) { .def("_mutable_data", [](framework::Tensor &self, paddle::platform::CPUPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_mutable_data", [](framework::Tensor &self, paddle::platform::XPUPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_mutable_data", [](framework::Tensor &self, paddle::platform::CUDAPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_mutable_data", [](framework::Tensor &self, paddle::platform::CUDAPinnedPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_mutable_data", [](framework::Tensor &self, paddle::platform::MLUPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_clear", &framework::Tensor::clear) .def("_mutable_data", [](framework::Tensor &self, paddle::platform::NPUPlace &place, paddle::framework::proto::VarType::Type type) { - return reinterpret_cast(self.mutable_data(place, type)); + return reinterpret_cast(self.mutable_data( + place, framework::TransToPtenDataType(type))); }) .def("_copy_from", &TensorCopyFrom, py::arg("tensor"), py::arg("place"), py::arg("batch_size") = -1) @@ -941,7 +950,10 @@ PYBIND11_MODULE(core_noavx, m) { .def("_set_double_element", TensorSetElement) .def("_get_double_element", TensorGetElement) .def("_place", [](framework::Tensor &self) { return self.place(); }) - .def("_dtype", [](framework::Tensor &self) { return self.type(); }) + .def("_dtype", + [](framework::Tensor &self) { + return framework::TransToProtoVarType(self.type()); + }) .def("_layout", [](framework::Tensor &self) { return DataLayoutToString(self.layout()); @@ -1207,7 +1219,7 @@ PYBIND11_MODULE(core_noavx, m) { // 4. Rebuild Tensor tensor.ResetHolderWithType( shared_reader_holder, - static_cast(t[2].cast())); + static_cast(t[2].cast())); tensor.Resize(make_ddim(t[3].cast>())); tensor.set_lod(t[4].cast()); diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc index d4fa1b2c89abf..1e6a30a3ba783 100644 --- a/paddle/fluid/pybind/reader_py.cc +++ b/paddle/fluid/pybind/reader_py.cc @@ -353,7 +353,8 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) { auto new_var = std::make_shared(act_name); new_var->SetPersistable(false); new_var->SetType(framework::proto::VarType::LOD_TENSOR); - new_var->SetDataType(lod_tensor.type()); + new_var->SetDataType( + framework::TransToProtoVarType(lod_tensor.dtype())); auto *tensor = new_var->MutableVar()->GetMutable(); *tensor = std::move(lod_tensor); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index dac84abfb9857..9a11c5946f318 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -31,6 +31,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/cuda_device_guard.h" #endif +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/profiler.h" @@ -308,7 +309,7 @@ void SetTensorFromPyArrayT( if (zero_copy) { auto holder = std::make_shared>(array); auto type = framework::ToDataType(std::type_index(typeid(T))); - self->ResetHolderWithType(holder, type); + self->ResetHolderWithType(holder, framework::TransToPtenDataType(type)); } else { auto dst = self->mutable_data(place); std::memcpy(dst, array.data(), array.nbytes()); @@ -332,7 +333,7 @@ void SetTensorFromPyArrayT( if (zero_copy) { auto holder = std::make_shared>(array); auto type = framework::ToDataType(std::type_index(typeid(T))); - self->ResetHolderWithType(holder, type); + self->ResetHolderWithType(holder, framework::TransToPtenDataType(type)); } else { auto dst = self->mutable_data(place); std::memcpy(dst, array.data(), array.nbytes()); @@ -477,7 +478,8 @@ void SetUVATensorFromPyArray( std::make_shared( cuda_device_pointer, need_allocate_size, platform::CUDAPlace(device_id)); - self_tensor->ResetHolderWithType(holder, data_type); + self_tensor->ResetHolderWithType(holder, + framework::TransToPtenDataType(data_type)); #endif } @@ -577,21 +579,21 @@ inline framework::Tensor *_getTensor(const framework::Tensor &self, output->Resize(ddim); auto place = self.place(); if (platform::is_cpu_place(place)) { - output->mutable_data(place, self.type()); + output->mutable_data(place, self.dtype()); } else if (platform::is_xpu_place(place)) { #ifdef PADDLE_WITH_XPU - output->mutable_data(place, self.type()); + output->mutable_data(place, self.dtype()); #endif } else if (platform::is_mlu_place(place)) { #ifdef PADDLE_WITH_MLU - output->mutable_data(place, self.type()); + output->mutable_data(place, self.dtype()); #endif } else { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (platform::is_cuda_pinned_place(place)) { - output->mutable_data(place, self.type()); + output->mutable_data(place, self.dtype()); } else if ((platform::is_gpu_place(place))) { - output->mutable_data(place, self.type()); + output->mutable_data(place, self.dtype()); } #endif } @@ -677,7 +679,7 @@ inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self, inline framework::Tensor *_sliceTensor(const framework::Tensor &self, py::object obj, int dim) { - auto src_type = self.type(); + auto src_type = framework::TransToProtoVarType(self.dtype()); switch (src_type) { case framework::proto::VarType::FP16: return _sliceAndConcat(self, obj, dim); @@ -756,7 +758,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, bool is_npu_tensor = platform::is_npu_place(tensor.place()); bool is_mlu_tensor = platform::is_mlu_place(tensor.place()); const auto &tensor_dims = tensor.dims(); - auto tensor_dtype = tensor.type(); + auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype()); size_t sizeof_dtype = framework::SizeOfType(tensor_dtype); std::vector py_dims(tensor_dims.size()); @@ -771,7 +773,8 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, const void *tensor_buf_ptr = tensor.data(); - std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(tensor.type()); + std::string py_dtype_str = details::TensorDTypeToPyDTypeStr( + framework::TransToProtoVarType(tensor.dtype())); if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor && !is_mlu_tensor) { if (!need_deep_copy) { diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 230787c1b35cd..ea0e9d8cd3e0a 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -91,11 +91,11 @@ pten::ScalarArray MakePtenScalarArrayFromVarList( vector_data.reserve(variable_list.size()); for (auto* var : variable_list) { - paddle::framework::proto::VarType::Type data_type; + paddle::experimental::DataType data_type; if (var->IsType()) { const auto& tensor = var->Get(); - data_type = tensor.type(); - if (data_type == paddle::framework::proto::VarType::INT64) { + data_type = tensor.dtype(); + if (data_type == paddle::experimental::DataType::INT64) { const auto& tensor = var->Get(); if (tensor.IsInitialized() && !platform::is_same_place(tensor.place(), expected_place)) { @@ -105,7 +105,7 @@ pten::ScalarArray MakePtenScalarArrayFromVarList( } else { vector_data.push_back(*tensor.data()); } - } else if (data_type == paddle::framework::proto::VarType::INT32) { + } else if (data_type == paddle::experimental::DataType::INT32) { const auto& tensor = var->Get(); if (tensor.IsInitialized() && !platform::is_same_place(tensor.place(), expected_place)) { diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index 46cd2a96414de..6beaf5797d692 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -8,7 +8,7 @@ if(WITH_GPU) endif() cc_library(pten_enforce INTERFACE SRCS enforce.cc DEPS ${pten_enforce_deps}) -cc_library(kernel_factory SRCS kernel_factory.cc DEPS pten_enforce convert_utils) +cc_library(kernel_factory SRCS kernel_factory.cc DEPS pten_enforce fluid_convert_utils) cc_library(kernel_context SRCS kernel_context.cc DEPS pten_enforce pten_context) cc_library(ddim SRCS ddim.cc DEPS pten_enforce) @@ -17,7 +17,7 @@ cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce mixed_vector) cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce mixed_vector) cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base) -cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS convert_utils tensor_meta tensor_base) +cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base) cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) diff --git a/paddle/pten/core/compat/convert_utils.cc b/paddle/pten/core/compat/convert_utils.cc index 191f2c6c4b50a..2cd286677462d 100644 --- a/paddle/pten/core/compat/convert_utils.cc +++ b/paddle/pten/core/compat/convert_utils.cc @@ -21,77 +21,6 @@ limitations under the License. */ namespace pten { -paddle::experimental::DataType TransToPtenDataType( - const paddle::framework::proto::VarType::Type& dtype) { - // Set the order of case branches according to the frequency with - // the data type is used - switch (dtype) { - case paddle::framework::proto::VarType::FP32: - return DataType::FLOAT32; - case paddle::framework::proto::VarType::FP64: - return DataType::FLOAT64; - case paddle::framework::proto::VarType::INT64: - return DataType::INT64; - case paddle::framework::proto::VarType::INT32: - return DataType::INT32; - case paddle::framework::proto::VarType::INT8: - return DataType::INT8; - case paddle::framework::proto::VarType::UINT8: - return DataType::UINT8; - case paddle::framework::proto::VarType::INT16: - return DataType::INT16; - case paddle::framework::proto::VarType::COMPLEX64: - return DataType::COMPLEX64; - case paddle::framework::proto::VarType::COMPLEX128: - return DataType::COMPLEX128; - case paddle::framework::proto::VarType::FP16: - return DataType::FLOAT16; - case paddle::framework::proto::VarType::BF16: - return DataType::BFLOAT16; - case paddle::framework::proto::VarType::BOOL: - return DataType::BOOL; - default: - return DataType::UNDEFINED; - } -} - -paddle::framework::proto::VarType::Type TransToProtoVarType( - const paddle::experimental::DataType& dtype) { - // Set the order of case branches according to the frequency with - // the data type is used - switch (dtype) { - case DataType::FLOAT32: - return paddle::framework::proto::VarType::FP32; - case DataType::FLOAT64: - return paddle::framework::proto::VarType::FP64; - case DataType::INT64: - return paddle::framework::proto::VarType::INT64; - case DataType::INT32: - return paddle::framework::proto::VarType::INT32; - case DataType::INT8: - return paddle::framework::proto::VarType::INT8; - case DataType::UINT8: - return paddle::framework::proto::VarType::UINT8; - case DataType::INT16: - return paddle::framework::proto::VarType::INT16; - case DataType::COMPLEX64: - return paddle::framework::proto::VarType::COMPLEX64; - case DataType::COMPLEX128: - return paddle::framework::proto::VarType::COMPLEX128; - case DataType::FLOAT16: - return paddle::framework::proto::VarType::FP16; - case DataType::BFLOAT16: - return paddle::framework::proto::VarType::BF16; - case DataType::BOOL: - return paddle::framework::proto::VarType::BOOL; - default: - PADDLE_THROW(pten::errors::Unimplemented( - "Unsupported data type `%s` when casting it into " - "paddle data type.", - dtype)); - } -} - Backend TransToPtenBackend(const pten::Place& place) { if (place.GetType() == pten::AllocationType::CPU) { return Backend::CPU; @@ -135,97 +64,6 @@ pten::Place TransToPtenPlace(const Backend& backend, bool set_device_id) { } } -size_t DataTypeSize(DataType dtype) { - switch (dtype) { - case DataType::UNDEFINED: - return 0; - case DataType::BOOL: - return sizeof(bool); - case DataType::INT8: - return sizeof(int8_t); - case DataType::UINT8: - return sizeof(uint8_t); - case DataType::INT16: - return sizeof(int16_t); - case DataType::INT32: - return sizeof(int); - case DataType::INT64: - return sizeof(int64_t); - case DataType::FLOAT16: - return sizeof(pten::dtype::float16); - case DataType::FLOAT32: - return sizeof(float); - case DataType::FLOAT64: - return sizeof(double); - case DataType::COMPLEX64: - return sizeof(pten::dtype::complex); - case DataType::COMPLEX128: - return sizeof(pten::dtype::complex); - default: - return 0; - } -} - -DataType String2DataType(const std::string& str) { - if (str == "bool") { - return DataType::BOOL; - } else if (str == "float16") { - return DataType::FLOAT16; - } else if (str == "float32") { - return DataType::FLOAT32; - } else if (str == "float64") { - return DataType::FLOAT64; - } else if (str == "int8") { - return DataType::INT8; - } else if (str == "int16") { - return DataType::INT16; - } else if (str == "int32") { - return DataType::INT32; - } else if (str == "int64") { - return DataType::INT64; - } else if (str == "uint8") { - return DataType::UINT8; - } else if (str == "complex64") { - return DataType::COMPLEX64; - } else if (str == "complex128") { - return DataType::COMPLEX128; - } else { - return DataType::UNDEFINED; - } -} - -std::string DataType2String(DataType dtype) { - switch (dtype) { - case DataType::BOOL: - return "bool"; - case DataType::INT8: - return "int8"; - case DataType::UINT8: - return "uint8"; - case DataType::INT16: - return "int16"; - case DataType::INT32: - return "int32"; - case DataType::INT64: - return "int64"; - case DataType::FLOAT16: - return "float16"; - case DataType::FLOAT32: - return "float32"; - case DataType::FLOAT64: - return "float64"; - case DataType::COMPLEX64: - return "complex64"; - case DataType::COMPLEX128: - return "complex128"; - default: - PADDLE_THROW(pten::errors::InvalidArgument( - "Unknow pten::DataType, the int value = %d.", - static_cast(dtype))); - return ""; - } -} - std::string TransToPtenKernelName(const std::string& fluid_op_name) { return OpUtilsMap::Instance().GetBaseKernelName(fluid_op_name); } diff --git a/paddle/pten/core/compat/convert_utils.h b/paddle/pten/core/compat/convert_utils.h index 3e6672e8c941c..0db71b577de51 100644 --- a/paddle/pten/core/compat/convert_utils.h +++ b/paddle/pten/core/compat/convert_utils.h @@ -27,20 +27,10 @@ limitations under the License. */ namespace pten { -// dtype convert function still relay on fluid proto -DataType TransToPtenDataType( - const paddle::framework::proto::VarType::Type& dtype); -paddle::framework::proto::VarType::Type TransToProtoVarType( - const DataType& dtype); - std::string TransToPtenKernelName(const std::string& fluid_op_name); const std::string& TransToFluidOpName(const std::string& pten_kernel_name); Backend TransToPtenBackend(const pten::Place& place); pten::Place TransToPtenPlace(const Backend& backend, bool set_device_id = true); -size_t DataTypeSize(DataType dtype); -DataType String2DataType(const std::string& str); -std::string DataType2String(DataType dtype); - } // namespace pten diff --git a/paddle/pten/core/dense_tensor.inl b/paddle/pten/core/dense_tensor.inl index 4df6b2556a8af..735783a844d2e 100644 --- a/paddle/pten/core/dense_tensor.inl +++ b/paddle/pten/core/dense_tensor.inl @@ -37,7 +37,7 @@ private: /* @jim19930609: Remove dependency on protobuf after Tensor Unification. */ -explicit DenseTensor(paddle::framework::proto::VarType::Type dtype); +explicit DenseTensor(paddle::experimental::DataType dtype); /// \brief Use existing storage space to create dense tensor. This interface /// can be used to deliberately create an uninitialized dense tensor. @@ -63,23 +63,19 @@ T* mutable_data(const DDim& dims, size_t requested_size = 0); void* mutable_data(const paddle::platform::Place& place, - paddle::framework::proto::VarType::Type type, + paddle::experimental::DataType type, size_t requested_size = 0); void* mutable_data(const paddle::platform::Place& place, size_t requested_size = 0); void* mutable_data(const paddle::platform::Place& place, - paddle::framework::proto::VarType::Type type, + paddle::experimental::DataType type, const pten::Stream& stream); /* @jim19930609: Remove dependency on protobuf after Tensor Unification. */ -paddle::framework::proto::VarType::Type type() const; - -/* @jim19930609: Remove dependency on protobuf after Tensor Unification. -*/ -paddle::framework::proto::VarType::Type saved_type() const; +paddle::experimental::DataType type() const; // memory size returns the holding memory size in byte. size_t memory_size() const; @@ -115,9 +111,9 @@ std::shared_ptr MoveMemoryHolder() { void ResetHolder(const std::shared_ptr& holder); void ResetHolderWithType(const std::shared_ptr& holder, - paddle::framework::proto::VarType::Type type); + paddle::experimental::DataType type); -void set_type(paddle::framework::proto::VarType::Type type); +void set_type(paddle::experimental::DataType type); InplaceVersion& InplaceVersionCounter() { return *inplace_version_counter_; diff --git a/paddle/pten/core/dense_tensor_impl.cc b/paddle/pten/core/dense_tensor_impl.cc index 80c66c057158b..dfde62618d01c 100644 --- a/paddle/pten/core/dense_tensor_impl.cc +++ b/paddle/pten/core/dense_tensor_impl.cc @@ -30,8 +30,8 @@ DenseTensor::DenseTensor() { meta_.offset = 0; } -DenseTensor::DenseTensor(paddle::framework::proto::VarType::Type dtype) { - meta_.dtype = TransToPtenDataType(dtype); +DenseTensor::DenseTensor(paddle::experimental::DataType dtype) { + meta_.dtype = dtype; meta_.offset = 0; } @@ -64,13 +64,7 @@ const paddle::platform::Place& DenseTensor::place() const { return holder_->place(); } -paddle::framework::proto::VarType::Type DenseTensor::type() const { - return TransToProtoVarType(meta_.dtype); -} - -paddle::framework::proto::VarType::Type DenseTensor::saved_type() const { - return TransToProtoVarType(meta_.dtype); -} +paddle::experimental::DataType DenseTensor::type() const { return meta_.dtype; } void DenseTensor::set_layout(const paddle::framework::DataLayout layout) { meta_.layout = layout; @@ -96,17 +90,17 @@ void DenseTensor::ResetHolder(const std::shared_ptr& holder) { void DenseTensor::ResetHolderWithType( const std::shared_ptr& holder, - paddle::framework::proto::VarType::Type type) { + paddle::experimental::DataType type) { set_type(type); ResetHolder(holder); } -void DenseTensor::set_type(paddle::framework::proto::VarType::Type type) { - meta_.dtype = TransToPtenDataType(type); +void DenseTensor::set_type(paddle::experimental::DataType type) { + meta_.dtype = type; } void* DenseTensor::mutable_data(const paddle::platform::Place& place, - paddle::framework::proto::VarType::Type type, + paddle::experimental::DataType type, size_t requested_size) { set_type(type); PADDLE_ENFORCE_GE( @@ -139,7 +133,7 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place, } void* DenseTensor::mutable_data(const paddle::platform::Place& place, - paddle::framework::proto::VarType::Type type, + paddle::experimental::DataType type, const pten::Stream& stream) { set_type(type); PADDLE_ENFORCE_GE( @@ -183,8 +177,10 @@ template inline T* DenseTensor::mutable_data(const paddle::platform::Place& place, size_t requested_size) { static_assert(std::is_pod::value, "T must be POD"); - return reinterpret_cast(mutable_data( - place, paddle::framework::DataTypeTrait::DataType(), requested_size)); + return reinterpret_cast( + mutable_data(place, + paddle::experimental::CppTypeToDataType::Type(), + requested_size)); } void DenseTensor::ShareBufferWith(const DenseTensor& tensor) { diff --git a/paddle/pten/kernels/cpu/copy_kernel.cc b/paddle/pten/kernels/cpu/copy_kernel.cc index 6a5d33b91aa76..8df53c064a127 100644 --- a/paddle/pten/kernels/cpu/copy_kernel.cc +++ b/paddle/pten/kernels/cpu/copy_kernel.cc @@ -47,8 +47,7 @@ void Copy(const Context& dev_ctx, VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr; CHECK(dst->layout() == src.layout()); - auto size = src.numel() * - paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); + auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); if (paddle::platform::is_cpu_place(src_place)) { paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size); diff --git a/paddle/pten/kernels/funcs/math_function.cc b/paddle/pten/kernels/funcs/math_function.cc index 550ec23c18f3a..780068e0381aa 100644 --- a/paddle/pten/kernels/funcs/math_function.cc +++ b/paddle/pten/kernels/funcs/math_function.cc @@ -220,8 +220,9 @@ void set_constant_with_place( const paddle::platform::DeviceContext& context, paddle::framework::Tensor* tensor, float value) { - paddle::framework::VisitDataType(tensor->type(), - TensorSetConstantCPU(tensor, value)); + paddle::framework::VisitDataType( + paddle::framework::TransToProtoVarType(tensor->type()), + TensorSetConstantCPU(tensor, value)); } template <> @@ -238,8 +239,9 @@ void set_constant_with_place( const paddle::platform::DeviceContext& context, paddle::framework::Tensor* tensor, float value) { - paddle::framework::VisitDataType(tensor->type(), - TensorSetConstantCPU(tensor, value)); + paddle::framework::VisitDataType( + paddle::framework::TransToProtoVarType(tensor->type()), + TensorSetConstantCPU(tensor, value)); } struct TensorSetConstantWithPlace : public boost::static_visitor { diff --git a/paddle/pten/kernels/funcs/math_function.cu b/paddle/pten/kernels/funcs/math_function.cu index 76bc5f806d3e8..f7cee12b2dfd4 100644 --- a/paddle/pten/kernels/funcs/math_function.cu +++ b/paddle/pten/kernels/funcs/math_function.cu @@ -227,7 +227,8 @@ void set_constant_with_place( paddle::framework::Tensor* tensor, float value) { paddle::framework::VisitDataType( - tensor->type(), TensorSetConstantGPU(context, tensor, value)); + paddle::framework::TransToProtoVarType(tensor->type()), + TensorSetConstantGPU(context, tensor, value)); } template diff --git a/paddle/pten/kernels/funcs/math_function.h b/paddle/pten/kernels/funcs/math_function.h index 8208c0afb0675..73b9dd00bc640 100644 --- a/paddle/pten/kernels/funcs/math_function.h +++ b/paddle/pten/kernels/funcs/math_function.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/pten/kernels/funcs/math_function_impl.h b/paddle/pten/kernels/funcs/math_function_impl.h index 286f694ce51a9..19f3082c05cc2 100644 --- a/paddle/pten/kernels/funcs/math_function_impl.h +++ b/paddle/pten/kernels/funcs/math_function_impl.h @@ -31,7 +31,7 @@ void SetConstant::operator()( if (paddle::platform::is_xpu_place(context.GetPlace())) { xpu_place = true; paddle::framework::VisitDataType( - tensor->type(), + paddle::framework::TransToProtoVarType(tensor->type()), TensorSetConstantXPU(tensor, num, context.GetPlace())); } #endif diff --git a/paddle/pten/kernels/gpu/copy_kernel.cu b/paddle/pten/kernels/gpu/copy_kernel.cu index d48a9fb1d774f..5d6229f09f015 100644 --- a/paddle/pten/kernels/gpu/copy_kernel.cu +++ b/paddle/pten/kernels/gpu/copy_kernel.cu @@ -54,8 +54,7 @@ void Copy(const Context& dev_ctx, CHECK(dst->layout() == src.layout()); - auto size = src.numel() * - paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); + auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); if (paddle::platform::is_cuda_pinned_place(src_place) && // NOLINT paddle::platform::is_cuda_pinned_place(dst_place)) { diff --git a/paddle/pten/kernels/xpu/copy_kernel.cc b/paddle/pten/kernels/xpu/copy_kernel.cc index f27705ca112b3..fa0331f24ddb7 100644 --- a/paddle/pten/kernels/xpu/copy_kernel.cc +++ b/paddle/pten/kernels/xpu/copy_kernel.cc @@ -45,8 +45,7 @@ void Copy(const Context& dev_ctx, << dst_place; dst->ResizeAndAllocate(src.dims()); CHECK(dst->layout() == src.layout()); - auto size = src.numel() * - paddle::framework::SizeOfType(TransToProtoVarType(src.dtype())); + auto size = src.numel() * paddle::experimental::SizeOf(src.dtype()); if (paddle::platform::is_xpu_place(src_place) && // NOLINT paddle::platform::is_cpu_place(dst_place)) { diff --git a/paddle/pten/tests/core/test_convert_utils.cc b/paddle/pten/tests/core/test_convert_utils.cc index cc7ac6e7e59ca..977e49aafb9bd 100644 --- a/paddle/pten/tests/core/test_convert_utils.cc +++ b/paddle/pten/tests/core/test_convert_utils.cc @@ -13,59 +13,66 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/pten/core/compat/convert_utils.h" +#include "paddle/fluid/framework/convert_utils.h" namespace pten { namespace tests { TEST(ConvertUtils, DataType) { // enum -> proto - CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT64) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT64) == paddle::framework::proto::VarType::FP64); - CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT32) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT32) == paddle::framework::proto::VarType::FP32); - CHECK(pten::TransToProtoVarType(paddle::DataType::UINT8) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::UINT8) == paddle::framework::proto::VarType::UINT8); - CHECK(pten::TransToProtoVarType(paddle::DataType::INT8) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT8) == paddle::framework::proto::VarType::INT8); - CHECK(pten::TransToProtoVarType(paddle::DataType::INT32) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT32) == paddle::framework::proto::VarType::INT32); - CHECK(pten::TransToProtoVarType(paddle::DataType::INT64) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT64) == paddle::framework::proto::VarType::INT64); - CHECK(pten::TransToProtoVarType(paddle::DataType::INT16) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT16) == paddle::framework::proto::VarType::INT16); - CHECK(pten::TransToProtoVarType(paddle::DataType::BOOL) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::BOOL) == paddle::framework::proto::VarType::BOOL); - CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX64) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::COMPLEX64) == paddle::framework::proto::VarType::COMPLEX64); - CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX128) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::COMPLEX128) == paddle::framework::proto::VarType::COMPLEX128); - CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT16) == + CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT16) == paddle::framework::proto::VarType::FP16); // proto -> enum - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP64) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::FP64) == paddle::DataType::FLOAT64); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP32) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::FP32) == paddle::DataType::FLOAT32); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT64) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::INT64) == paddle::DataType::INT64); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT32) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::INT32) == paddle::DataType::INT32); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT8) == - paddle::DataType::INT8); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::UINT8) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::INT8) == paddle::DataType::INT8); + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::UINT8) == paddle::DataType::UINT8); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT16) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::INT16) == paddle::DataType::INT16); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::BOOL) == - paddle::DataType::BOOL); - CHECK( - pten::TransToPtenDataType(paddle::framework::proto::VarType::COMPLEX64) == - paddle::DataType::COMPLEX64); - CHECK(pten::TransToPtenDataType( + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::BOOL) == paddle::DataType::BOOL); + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::COMPLEX64) == + paddle::DataType::COMPLEX64); + CHECK(paddle::framework::TransToPtenDataType( paddle::framework::proto::VarType::COMPLEX128) == paddle::DataType::COMPLEX128); - CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP16) == + CHECK(paddle::framework::TransToPtenDataType( + paddle::framework::proto::VarType::FP16) == paddle::DataType::FLOAT16); } diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h index f919340303dde..9cec48f9c99b5 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" @@ -78,7 +79,8 @@ struct ReluFunctor { inline void ReluForward(const paddle::framework::Tensor &x, paddle::framework::Tensor *y) { custom_raw_op::ReluFunctor functor(x, y); - paddle::framework::VisitDataType(x.type(), functor); + paddle::framework::VisitDataType( + paddle::framework::TransToProtoVarType(x.dtype()), functor); } } // namespace custom_raw_op