From 7e7e9404217fb1f2473f9e41d01dece67c45681f Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Tue, 15 Feb 2022 09:39:01 +0800
Subject: [PATCH] [PTen]Migrate proto::VarType outside of Pten (#39411)

* #1 migrate dist-related type()-> dtype()

* move datatype function from pten -> fluid/framework

* change type() in imperative into convert(dtype())

* modify xx_tensor->type into xx_tensor->dtype

* change the set_type interface and the caller

* modify xx_tensor.type into xx_tensor.dtype

* fix mutable_data(place, dtype())

* change caller of mutable_data in pten and distributed

* change the caller of mutable_data in fluid/framework

* change the caller of mutable_data in imperative directory

* mutable_data: inference

* update the call of mutable_data

* transfer MakePenScalarArray MakePtenScalar ResetHolderWithType

* pass the compile. the next step is remove VarType in Pten

* fix all and remove VarType from pten. success in linux. Next task is other platform

* fix conflict with develop

* fix compiled error

* Fix reset conversion

* fix conflict

* fix compiled problem

* fix typo

* Fix << in tensor_utils.cc

* fix type->dtype

* fix unittest

* fix tensor init constructor

* fix DataTypeSize for BFloat16

* fix code style

* fix npu compiled error

* fix npu

* compile npu sucessfully

* fix conflict

* fix conflict

Co-authored-by: xiongkun <xiongkun03@baidu.com>
---
 .../distributed/fleet_executor/dist_model.cc  |   2 +-
 .../distributed/ps/service/brpc_utils.cc      |  67 ++++---
 .../distributed/ps/service/heter_client.cc    |   6 +-
 paddle/fluid/eager/grad_tensor_holder.cc      |   1 +
 paddle/fluid/framework/CMakeLists.txt         |   6 +
 paddle/fluid/framework/convert_utils.cc       | 185 ++++++++++++++++++
 paddle/fluid/framework/convert_utils.h        |  46 +++++
 paddle/fluid/framework/custom_operator.cc     |  15 +-
 .../fluid/framework/data_layout_transform.cc  |  21 +-
 paddle/fluid/framework/data_type_test.cc      |  11 +-
 paddle/fluid/framework/data_type_transform.cc |  17 +-
 .../framework/details/all_reduce_op_handle.cc |  10 +-
 .../framework/details/broadcast_op_handle.cc  |   9 +-
 .../details/fetch_async_op_handle.cc          |  11 +-
 .../details/fused_all_reduce_op_handle.cc     |   4 +-
 .../framework/details/gather_op_handle.cc     |   2 +-
 .../framework/details/nan_inf_utils_detail.cc |  24 ++-
 .../framework/details/nan_inf_utils_detail.cu |   3 +-
 .../framework/details/reduce_and_gather.h     |   3 +-
 .../framework/details/reduce_op_handle.cc     |  17 +-
 .../details/sparse_all_reduce_op_handle.cc    |   5 +-
 .../framework/details/variable_visitor.cc     |   3 +-
 paddle/fluid/framework/device_worker.cc       |  10 +-
 paddle/fluid/framework/dist_multi_trainer.cc  |  10 +-
 paddle/fluid/framework/dlpack_tensor.cc       |   4 +-
 .../fluid/framework/executor_thread_worker.cc |  15 +-
 paddle/fluid/framework/fleet/ascend_wrapper.h |  12 +-
 paddle/fluid/framework/fleet/heter_wrapper.cc |  45 +++--
 .../fluid/framework/heter_section_worker.cc   |  18 +-
 paddle/fluid/framework/heterxpu_trainer.cc    |  61 +++---
 paddle/fluid/framework/hogwild_worker.cc      |  12 +-
 paddle/fluid/framework/infershape_utils.cc    |   5 +-
 .../ir/conv_affine_channel_fuse_pass.cc       |   4 +-
 .../fluid/framework/ir/conv_bn_fuse_pass.cc   |   7 +-
 .../framework/ir/layer_norm_fuse_pass.cc      |  12 +-
 .../conv_bias_mkldnn_fuse_pass_tester.cc      |   3 +-
 .../ir/mkldnn/cpu_quantize_pass_tester.cc     |   3 +-
 .../mkldnn/cpu_quantize_squash_pass_tester.cc |   3 +-
 paddle/fluid/framework/lod_tensor.cc          |  13 +-
 paddle/fluid/framework/multi_trainer.cc       |  10 +-
 .../framework/new_executor/data_transfer.cc   |   5 +-
 paddle/fluid/framework/operator.cc            |  22 ++-
 paddle/fluid/framework/operator.h             |   5 +-
 .../paddle2cinn/cinn_graph_symbolization.cc   |   3 +-
 .../cinn_graph_symbolization_test.cc          |   5 +-
 paddle/fluid/framework/parallel_executor.cc   |  16 +-
 paddle/fluid/framework/ps_gpu_trainer.cc      |  10 +-
 paddle/fluid/framework/pten_utils.cc          |   5 +-
 paddle/fluid/framework/save_load_util.cc      |   5 +-
 paddle/fluid/framework/tensor_util.cc         |  46 +++--
 paddle/fluid/framework/tensor_util.h          |   6 +-
 paddle/fluid/imperative/all_reduce.cc         |  10 +-
 paddle/fluid/imperative/basic_engine.cc       |   4 +-
 paddle/fluid/imperative/bkcl_context.cc       |  11 +-
 paddle/fluid/imperative/gloo_context.cc       |   7 +-
 .../fluid/imperative/gradient_accumulator.cc  |  29 ++-
 paddle/fluid/imperative/hccl_context.cc       |   9 +-
 .../imperative/jit/program_desc_tracer.cc     |   3 +-
 paddle/fluid/imperative/layer.cc              |  10 +-
 paddle/fluid/imperative/nccl_context.cc       |   4 +-
 .../fluid/imperative/partial_grad_engine.cc   |  10 +-
 paddle/fluid/imperative/prepared_operator.h   |   3 +-
 paddle/fluid/imperative/reducer.cc            |   5 +-
 .../tests/test_gradient_accmulator.cc         |   7 +-
 paddle/fluid/imperative/tests/test_group.cc   |   4 +-
 paddle/fluid/imperative/var_helper.cc         |   7 +-
 paddle/fluid/imperative/variable_wrapper.h    |   3 +-
 .../fluid/inference/api/analysis_predictor.cc |   2 +-
 paddle/fluid/inference/api/api_impl.cc        |   2 +-
 paddle/fluid/inference/api/api_impl_tester.cc |  10 +-
 .../inference/api/details/zero_copy_tensor.cc |   3 +-
 paddle/fluid/inference/capi/pd_predictor.cc   |   9 +-
 paddle/fluid/inference/lite/tensor_utils.cc   |  38 ++--
 .../fluid/inference/tests/api/tester_helper.h |   4 +-
 paddle/fluid/operators/abs_op.cc              |   5 +-
 paddle/fluid/operators/activation_op_mlu.cc   |  25 ++-
 paddle/fluid/operators/activation_op_npu.cc   |   9 +-
 paddle/fluid/operators/allclose_op.h          |  22 ++-
 .../amp/update_loss_scaling_op_npu.cc         |   6 +-
 paddle/fluid/operators/argsort_op_npu.cc      |  13 +-
 .../fluid/operators/array_to_lod_tensor_op.cc |   9 +-
 paddle/fluid/operators/batch_norm_op.cc       |  12 +-
 .../fluid/operators/beam_search_decode_op.cc  |   3 +-
 paddle/fluid/operators/bincount_op.cu         |   4 +-
 paddle/fluid/operators/bincount_op.h          |   4 +-
 paddle/fluid/operators/cast_op.cc             |  25 ++-
 paddle/fluid/operators/cast_op.h              |   2 +-
 paddle/fluid/operators/cast_op_npu.cc         |   2 +-
 paddle/fluid/operators/cast_op_xpu.cc         |   3 +-
 .../fluid/operators/class_center_sample_op.cu |   5 +-
 paddle/fluid/operators/coalesce_tensor_op.cc  |   6 +-
 .../fluid/operators/collective/allreduce_op.h |   3 +-
 .../operators/collective/alltoall_op.cu.cc    |   3 +-
 .../operators/collective/barrier_op.cu.cc     |   3 +-
 .../operators/collective/broadcast_op.cu.cc   |   4 +-
 .../operators/collective/c_allgather_op.cu.cc |   4 +-
 .../collective/c_allgather_op_npu.cc          |   3 +-
 .../operators/collective/c_allreduce_op.h     |  11 +-
 .../operators/collective/c_broadcast_op.cu.cc |   3 +-
 .../collective/c_broadcast_op_npu.cc          |   3 +-
 .../operators/collective/c_concat_op.cu.cc    |   3 +-
 .../operators/collective/c_embedding_op.cu    |   5 +-
 .../operators/collective/c_embedding_op.h     |  10 +-
 .../collective/c_embedding_op_npu.cc          |  17 +-
 .../fluid/operators/collective/c_reduce_op.h  |  10 +-
 .../collective/c_reducescatter_op.cu.cc       |   3 +-
 .../collective/c_reducescatter_op_npu.cc      |   3 +-
 .../operators/collective/c_scatter_op.cu.cc   |   3 +-
 .../c_softmax_with_cross_entropy_op.cu        |  19 +-
 .../collective/global_gather_op.cu.cc         |  10 +-
 .../collective/global_scatter_op.cu.cc        |  10 +-
 .../collective/partial_allgather_op.cu.cc     |   3 +-
 .../collective/partial_allgather_op_npu.cc    |   3 +-
 .../collective/partial_recv_op_npu.cc         |   3 +-
 .../collective/partial_send_op.cu.cc          |   4 +-
 .../collective/partial_send_op_npu.cc         |   3 +-
 .../operators/collective/recv_v2_op_npu.cc    |   3 +-
 .../operators/collective/send_v2_op.cu.cc     |   6 +-
 .../operators/collective/send_v2_op_npu.cc    |   3 +-
 paddle/fluid/operators/concat_op.cc           |   2 +-
 paddle/fluid/operators/concat_op_mlu.cc       |   8 +-
 .../controlflow/conditional_block_op.cc       |   2 +-
 .../controlflow/conditional_block_op.h        |   3 +-
 .../controlflow/tensor_array_read_write_op.cc |   2 +-
 .../fluid/operators/controlflow/while_op.cc   |   3 +-
 paddle/fluid/operators/conv_op.cc             |   3 +-
 paddle/fluid/operators/conv_op.h              |  20 +-
 paddle/fluid/operators/conv_op_mlu.cc         |  10 +-
 paddle/fluid/operators/correlation_op.cc      |   8 +-
 paddle/fluid/operators/crop_op_npu.cc         |   2 +-
 paddle/fluid/operators/cumsum_op_npu.cc       |   9 +-
 paddle/fluid/operators/data_norm_op.cu        |   9 +-
 paddle/fluid/operators/decode_jpeg_op.cc      |   5 +-
 paddle/fluid/operators/detection/bbox_util.h  |   3 +-
 .../detection/density_prior_box_op_npu.cc     |   9 +-
 .../detection/iou_similarity_op_npu.cc        |   2 +-
 .../fluid/operators/detection/prior_box_op.cc |   4 +-
 .../fluid/operators/dlnne/dlnne_engine_op.h   |   2 +-
 paddle/fluid/operators/dropout_op_npu.cc      |  21 +-
 paddle/fluid/operators/eig_op.h               |   2 +-
 paddle/fluid/operators/eigvals_op.h           |   9 +-
 .../elementwise/elementwise_div_op.h          |   5 +-
 .../elementwise/elementwise_div_op_npu.cc     |   4 +-
 .../elementwise/elementwise_mul_op.h          |   5 +-
 .../operators/elementwise/elementwise_op.h    |  25 ++-
 .../mkldnn/elementwise_add_mkldnn_op.cc       |   9 +-
 .../mkldnn/elementwise_sub_mkldnn_op.cc       |   9 +-
 paddle/fluid/operators/empty_op.h             |   4 +-
 paddle/fluid/operators/expand_v2_op_npu.cc    |  10 +-
 .../fluid/operators/fill_any_like_op_npu.cc   |   2 +-
 .../fill_constant_batch_size_like_op.h        |   7 +-
 .../fill_constant_batch_size_like_op_npu.cc   |   8 +-
 paddle/fluid/operators/fill_constant_op.h     |  13 +-
 .../fluid/operators/fill_constant_op_mlu.cc   |   5 +-
 .../fluid/operators/fill_constant_op_npu.cc   |   2 +-
 paddle/fluid/operators/fill_diagonal_op.cc    |   4 +-
 .../operators/fill_diagonal_tensor_op.cc      |   3 +-
 paddle/fluid/operators/fill_op.h              |   6 +-
 .../fluid/operators/fused/conv_fusion_op.cu   |   4 +-
 .../operators/fused/fused_attention_op.cc     |   4 +-
 .../operators/fused/fused_bn_activation_op.cc |  13 +-
 .../operators/fused/fused_bn_activation_op.cu |   4 +-
 .../fused/fused_bn_add_activation_op.cc       |   6 +-
 .../fused/fused_bn_add_activation_op.cu       |   6 +-
 .../fused/fused_elemwise_activation_op.cc     |   4 +-
 .../fused_embedding_eltwise_layernorm_op.cc   |   2 +-
 .../fused_embedding_eltwise_layernorm_op.cu   |   7 +-
 .../operators/fused/fused_feedforward_op.cc   |   2 +-
 .../fused/mkldnn/fusion_gru_mkldnn_op.cc      |   6 +-
 .../fused/mkldnn/fusion_lstm_mkldnn_op.cc     |   8 +-
 .../fluid/operators/fused/resnet_unit_op.cc   |   6 +-
 paddle/fluid/operators/gather_nd_op.cu        |   4 +-
 paddle/fluid/operators/gather_nd_op.h         |   4 +-
 paddle/fluid/operators/gather_nd_op_npu.cc    |   2 +-
 paddle/fluid/operators/gather_nd_op_xpu.cc    |   2 +-
 paddle/fluid/operators/gather_op.cu           |  11 +-
 paddle/fluid/operators/gather_op.h            |  11 +-
 paddle/fluid/operators/gather_op_xpu.cc       |   6 +-
 .../fluid/operators/gaussian_random_op_npu.cc |   3 +-
 paddle/fluid/operators/graph_send_recv_op.cu  |   4 +-
 paddle/fluid/operators/graph_send_recv_op.h   |   4 +-
 paddle/fluid/operators/group_norm_op.cc       |   3 +-
 paddle/fluid/operators/increment_op_npu.cc    |   2 +-
 paddle/fluid/operators/index_sample_op.cu     |   4 +-
 paddle/fluid/operators/index_sample_op.h      |   7 +-
 paddle/fluid/operators/index_sample_op_npu.cc |   4 +-
 paddle/fluid/operators/index_select_op.cu     |   4 +-
 paddle/fluid/operators/index_select_op.h      |   4 +-
 paddle/fluid/operators/index_select_op_npu.cc |   3 +-
 paddle/fluid/operators/inplace_abn_op.cc      |  20 +-
 paddle/fluid/operators/instance_norm_op.cc    |   6 +-
 .../fluid/operators/interpolate_v2_op_npu.cc  |   5 +-
 paddle/fluid/operators/ipu/ipu_runtime_op.cc  |   2 +-
 paddle/fluid/operators/isclose_op.h           |  22 ++-
 paddle/fluid/operators/isfinite_op.cc         |   6 +-
 paddle/fluid/operators/isfinite_v2_op.cc      |   6 +-
 paddle/fluid/operators/kron_op.cc             |  10 +-
 paddle/fluid/operators/layer_norm_op.cu       |  20 +-
 paddle/fluid/operators/layer_norm_op_npu.cc   | 117 +++++++----
 paddle/fluid/operators/linspace_op.cu         |   8 +-
 paddle/fluid/operators/linspace_op.h          |   8 +-
 paddle/fluid/operators/load_combine_op.h      |   3 +-
 paddle/fluid/operators/load_op.h              |   3 +-
 .../fluid/operators/lod_tensor_to_array_op.cc |   3 +-
 paddle/fluid/operators/lookup_table_op.h      |   3 +-
 paddle/fluid/operators/lookup_table_v2_op.cu  |   6 +-
 paddle/fluid/operators/lookup_table_v2_op.h   |   9 +-
 .../fluid/operators/lookup_table_v2_op_npu.cc |   3 +-
 paddle/fluid/operators/lstsq_op.h             |   2 +-
 paddle/fluid/operators/lu_op.h                |   7 +-
 .../operators/margin_cross_entropy_op.cu      |  24 ++-
 .../fluid/operators/math/beam_search_npu.cc   | 112 ++++++-----
 paddle/fluid/operators/math/cross_entropy.cc  |   4 +-
 paddle/fluid/operators/math/cross_entropy.cu  |   4 +-
 .../operators/math/eigen_values_vectors.h     |   6 +-
 .../fluid/operators/math/matrix_solve.cu.cc   |   2 +-
 paddle/fluid/operators/matmul_op.cc           |   5 +-
 paddle/fluid/operators/matmul_op_npu.cc       |  22 +--
 paddle/fluid/operators/matmul_v2_op.cc        |  10 +-
 paddle/fluid/operators/matrix_power_op.h      |   2 +-
 paddle/fluid/operators/mean_op_mlu.cc         |  27 +--
 paddle/fluid/operators/mean_op_npu.cc         |   6 +-
 paddle/fluid/operators/memcpy_h2d_op.h        |   2 +-
 paddle/fluid/operators/meshgrid_op.cc         |   2 +-
 paddle/fluid/operators/meshgrid_op_npu.cc     |   2 +-
 .../operators/metrics/accuracy_op_npu.cc      |  28 +--
 .../operators/mkldnn/concat_mkldnn_op.cc      |  12 +-
 .../fluid/operators/mkldnn/conv_mkldnn_op.cc  |  24 ++-
 .../operators/mkldnn/dequantize_mkldnn_op.cc  |   4 +-
 .../operators/mkldnn/expand_v2_mkldnn_op.cc   |  10 +-
 .../operators/mkldnn/matmul_mkldnn_op.cc      |   8 +-
 .../fluid/operators/mkldnn/mul_mkldnn_op.cc   |   7 +-
 .../fluid/operators/mkldnn/pool_mkldnn_op.cc  |   6 +-
 .../operators/mkldnn/requantize_mkldnn_op.cc  |   6 +-
 .../operators/mkldnn/reshape_mkldnn_op.cc     |  60 +++---
 .../fluid/operators/mkldnn/slice_mkldnn_op.cc |  17 +-
 .../fluid/operators/mkldnn/split_mkldnn_op.cc |   8 +-
 .../fluid/operators/mkldnn/stack_mkldnn_op.cc |   3 +-
 .../fluid/operators/mkldnn/sum_mkldnn_op.cc   |   4 +-
 paddle/fluid/operators/mlu/mlu_baseop.cc      |   5 +-
 paddle/fluid/operators/mul_op_npu.cc          |  12 +-
 paddle/fluid/operators/one_hot_op_npu.cc      |   3 +-
 paddle/fluid/operators/one_hot_v2_op_npu.cc   |   3 +-
 .../fluid/operators/optimizers/adam_op_npu.cc |  12 +-
 .../operators/optimizers/rmsprop_op_npu.cc    |   6 +-
 paddle/fluid/operators/optimizers/sgd_op.cc   |   5 +-
 .../operators/optimizers/sparse_momentum_op.h |   6 +-
 paddle/fluid/operators/partial_concat_op.cc   |   2 +-
 paddle/fluid/operators/partial_sum_op.cc      |   2 +-
 .../pscore/send_and_recv_op_gpu_test.cc       |  14 +-
 paddle/fluid/operators/put_along_axis_op.cu   |   4 +-
 paddle/fluid/operators/put_along_axis_op.h    |   4 +-
 paddle/fluid/operators/pyramid_hash_op.cc     |   3 +-
 paddle/fluid/operators/randint_op.cu          |   3 +-
 .../fluid/operators/reader/buffered_reader.cc |   9 +-
 paddle/fluid/operators/reader/read_op.cc      |   3 +-
 paddle/fluid/operators/recurrent_op.cc        |   9 +-
 .../reduce_ops/mkldnn/reduce_mkldnn_op.h      |  20 +-
 .../operators/reduce_ops/reduce_max_op_npu.cc |   9 +-
 .../reduce_ops/reduce_mean_op_mlu.cc          |  24 ++-
 .../operators/reduce_ops/reduce_min_op_npu.cc |   8 +-
 paddle/fluid/operators/reduce_ops/reduce_op.h |  40 ++--
 .../reduce_ops/reduce_prod_op_npu.cc          |   6 +-
 .../operators/reduce_ops/reduce_sum_op.h      |   5 +-
 .../operators/reduce_ops/reduce_sum_op_npu.cc |  15 +-
 .../fluid/operators/repeat_interleave_op.cu   |   6 +-
 paddle/fluid/operators/repeat_interleave_op.h |   6 +-
 .../fluid/operators/rnn_memory_helper_op.cc   |   2 +-
 paddle/fluid/operators/roi_align_op_npu.cc    |   7 +-
 paddle/fluid/operators/save_combine_op.h      |   3 +-
 paddle/fluid/operators/save_op.h              |   2 +-
 paddle/fluid/operators/scale_op_mlu.cc        |  18 +-
 paddle/fluid/operators/scale_op_npu.cc        |   6 +-
 paddle/fluid/operators/scatter_nd_add_op.cc   |   5 +-
 paddle/fluid/operators/scatter_nd_add_op.cu   |   4 +-
 paddle/fluid/operators/scatter_nd_add_op.h    |   4 +-
 paddle/fluid/operators/scatter_op.cu          |   4 +-
 paddle/fluid/operators/scatter_op.h           |   4 +-
 paddle/fluid/operators/scatter_op_npu.cc      |   6 +-
 paddle/fluid/operators/scatter_op_xpu.cc      |   2 +-
 paddle/fluid/operators/searchsorted_op.h      |   4 +-
 paddle/fluid/operators/segment_pool_op.h      |   4 +-
 .../sequence_ops/sequence_mask_op_npu.cc      |   7 +-
 paddle/fluid/operators/set_value_op.h         |  17 +-
 paddle/fluid/operators/set_value_op_npu.cc    |   3 +-
 paddle/fluid/operators/shard_index_op_npu.cc  |   2 +-
 .../fluid/operators/shrink_rnn_memory_op.cc   |   4 +-
 paddle/fluid/operators/slice_op.cc            |   7 +-
 .../fluid/operators/smooth_l1_loss_op_npu.cc  |  22 +--
 .../operators/softmax_with_cross_entropy_op.h |   3 +-
 paddle/fluid/operators/solve_op.h             |  22 +--
 paddle/fluid/operators/sparse_attention_op.cu |   6 +-
 paddle/fluid/operators/spectral_op.cc         |  11 +-
 paddle/fluid/operators/spectral_op.cu         |  22 ++-
 paddle/fluid/operators/spectral_op.h          |   6 +-
 paddle/fluid/operators/split_op_mlu.cc        |   8 +-
 paddle/fluid/operators/strided_slice_op.cc    |   4 +-
 paddle/fluid/operators/sum_op.cc              |  16 +-
 paddle/fluid/operators/sum_op_mlu.cc          |   8 +-
 .../fluid/operators/sync_batch_norm_op.cu.h   |   7 +-
 .../fluid/operators/sync_batch_norm_op_npu.cc |  21 +-
 paddle/fluid/operators/take_along_axis_op.cu  |   4 +-
 paddle/fluid/operators/take_along_axis_op.h   |   4 +-
 paddle/fluid/operators/tdm_child_op.h         |   6 +-
 paddle/fluid/operators/tdm_sampler_op.h       |   9 +-
 paddle/fluid/operators/tensor_formatter.cc    |   4 +-
 .../operators/tensorrt/tensorrt_engine_op.h   |   2 +-
 paddle/fluid/operators/top_k_op_npu.cc        |   5 +-
 paddle/fluid/operators/top_k_v2_op_npu.cc     |   5 +-
 paddle/fluid/operators/transfer_layout_op.h   |   3 +-
 paddle/fluid/operators/transpose_op.cc        |   3 +-
 paddle/fluid/operators/tril_triu_op_npu.cc    |   3 +-
 .../truncated_gaussian_random_op_npu.cc       |  13 +-
 paddle/fluid/operators/uniform_random_op.h    |  17 +-
 .../fluid/operators/uniform_random_op_npu.cc  |   2 +-
 paddle/fluid/operators/unique_op.h            |   2 +-
 paddle/fluid/operators/unsqueeze_op.cc        |   6 +-
 paddle/fluid/operators/utils.h                |  16 +-
 paddle/fluid/operators/where_index_op_npu.cc  |   3 +-
 paddle/fluid/operators/where_op_npu.cc        |   2 +-
 .../platform/device/gpu/cuda/cudnn_desc.h     |  12 +-
 .../platform/device/gpu/rocm/miopen_desc.h    |   9 +-
 .../fluid/platform/device/ipu/ipu_compiler.cc |   7 +-
 .../fluid/platform/device/ipu/ipu_executor.cc |   3 +-
 paddle/fluid/platform/device/ipu/ipu_utils.h  |   3 +-
 .../platform/device/npu/npu_op_runner.cc      |  21 +-
 .../fluid/platform/device/npu/npu_op_runner.h |   3 +-
 paddle/fluid/platform/lodtensor_printer.cc    |  47 ++---
 paddle/fluid/platform/mkldnn_reuse.h          |   6 +-
 paddle/fluid/pybind/eager.cc                  |   4 +-
 paddle/fluid/pybind/eager_functions.cc        |   3 +-
 paddle/fluid/pybind/eager_method.cc           |   6 +-
 paddle/fluid/pybind/eager_properties.cc       |   3 +-
 paddle/fluid/pybind/imperative.cc             |  34 ++--
 paddle/fluid/pybind/pybind.cc                 |  30 ++-
 paddle/fluid/pybind/reader_py.cc              |   3 +-
 paddle/fluid/pybind/tensor_py.h               |  25 +--
 paddle/pten/api/lib/utils/tensor_utils.cc     |   8 +-
 paddle/pten/core/CMakeLists.txt               |   4 +-
 paddle/pten/core/compat/convert_utils.cc      | 162 ---------------
 paddle/pten/core/compat/convert_utils.h       |  10 -
 paddle/pten/core/dense_tensor.inl             |  16 +-
 paddle/pten/core/dense_tensor_impl.cc         |  28 ++-
 paddle/pten/kernels/cpu/copy_kernel.cc        |   3 +-
 paddle/pten/kernels/funcs/math_function.cc    |  10 +-
 paddle/pten/kernels/funcs/math_function.cu    |   3 +-
 paddle/pten/kernels/funcs/math_function.h     |   1 +
 .../pten/kernels/funcs/math_function_impl.h   |   2 +-
 paddle/pten/kernels/gpu/copy_kernel.cu        |   3 +-
 paddle/pten/kernels/xpu/copy_kernel.cc        |   3 +-
 paddle/pten/tests/core/test_convert_utils.cc  |  61 +++---
 .../tests/custom_op/custom_raw_op_kernel_op.h |   4 +-
 352 files changed, 2175 insertions(+), 1445 deletions(-)
 create mode 100644 paddle/fluid/framework/convert_utils.cc
 create mode 100644 paddle/fluid/framework/convert_utils.h
 mode change 100755 => 100644 paddle/fluid/operators/gaussian_random_op_npu.cc
 mode change 100755 => 100644 paddle/fluid/operators/top_k_v2_op_npu.cc

diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc
index e0b755c66bb17..40b0a8b55e17a 100644
--- a/paddle/fluid/distributed/fleet_executor/dist_model.cc
+++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc
@@ -562,7 +562,7 @@ bool DistModel::FetchResults(std::vector<DistModelTensor> *output_data,
     framework::FetchType &fetch_var =
         framework::GetFetchVariable(*scope, "fetch", idx);
     auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
     auto output = &(output_data->at(i));
     output->name = idx_to_fetches_[idx];
     bool rst = false;
diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc
index 23b2f5545ffc2..d1ef8b89ff092 100644
--- a/paddle/fluid/distributed/ps/service/brpc_utils.cc
+++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc
@@ -13,8 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/distributed/ps/service/brpc_utils.h"
+
 #include <arpa/inet.h>
 #include <netdb.h>
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/enforce.h"
 
 namespace paddle {
@@ -98,25 +101,29 @@ void SerializeLodTensor(framework::Variable* var,
       }
     }
   }
-  var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
+  var_msg->set_data_type(static_cast<VarMsg::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));
   for (auto& dim : framework::vectorize(tensor->dims())) {
     var_msg->add_dims(dim);
   }
   // IO Buffer
   if (platform::is_cpu_place(tensor->place())) {
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
     iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
     iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
   } else {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     memory::Copy(
         platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
     iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
     iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
     delete[] temp_ptr;
@@ -139,25 +146,29 @@ void SerializeSelectedRows(framework::Variable* var,
   var_data->resize(rows->size() * sizeof(int64_t));
   char* data_ptr = const_cast<char*>(var_data->data());
   memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t));
-  var_msg->set_data_type(static_cast<VarMsg::Type>(tensor->type()));
+  var_msg->set_data_type(static_cast<VarMsg::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));
   for (auto& dim : framework::vectorize(tensor->dims())) {
     var_msg->add_dims(dim);
   }
   // IO Buffer
   if (platform::is_cpu_place(tensor->place())) {
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
     iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
     iobuf->append(reinterpret_cast<const char*>(tensor->data()), data_len);
   } else {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     memory::Copy(
         platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
-    auto data_len = tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
+    auto data_len = tensor->numel() * framework::DataTypeSize(tensor->dtype());
     iobuf->append(reinterpret_cast<const char*>(&data_len), 8);
     iobuf->append(reinterpret_cast<const char*>(temp_ptr), data_len);
     delete[] temp_ptr;
@@ -225,8 +236,9 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
   }
   tensor->set_lod(lod);
 
-  void* tensor_data =
-      tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place,
+      framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
 
   // IO Buffer
   if (platform::is_cpu_place(place)) {
@@ -236,15 +248,16 @@ void DeserializeLodTensor(framework::Variable* var, const VarMsg& msg,
   } else if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
     unsigned long data_len;  // NOLINT
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
-    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);             // NOLINT
-    io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len);         // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];     // NOLINT
+    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);      // NOLINT
+    io_buffer_itr.copy_and_forward((void*)temp_ptr, data_len);  // NOLINT
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     memory::Copy(
         place, tensor_data, platform::CPUPlace(), (void*)temp_ptr,  // NOLINT
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
+        tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
     delete[] temp_ptr;
 #endif
   }
@@ -266,8 +279,9 @@ void DeserializeSelectedRows(
     vec_dim.push_back(x);
   }
   tensor->Resize(framework::make_ddim(vec_dim));
-  void* tensor_data =
-      tensor->mutable_data(place, VarMessageToVarType(msg.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place,
+      framework::TransToPtenDataType(VarMessageToVarType(msg.data_type())));
   // IO Buffer
   if (platform::is_cpu_place(place)) {
     unsigned long data_len;                                 // NOLINT
@@ -275,15 +289,16 @@ void DeserializeSelectedRows(
     io_buffer_itr.copy_and_forward(tensor_data, data_len);
   } else if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
-    char* temp_ptr = new char[tensor->numel() *
-                              framework::SizeOfType(tensor->type())];  // NOLINT
-    unsigned long data_len;                                            // NOLINT
-    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);             // NOLINT
+    char* temp_ptr =
+        new char[tensor->numel() *
+                 framework::DataTypeSize(tensor->dtype())];  // NOLINT
+    unsigned long data_len;                                  // NOLINT
+    io_buffer_itr.copy_and_forward((void*)(&data_len), 8);   // NOLINT
     io_buffer_itr.copy_and_forward(temp_ptr, data_len);
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     memory::Copy(place, tensor_data, platform::CPUPlace(), temp_ptr,
-                 tensor->numel() * framework::SizeOfType(tensor->type()),
+                 tensor->numel() * framework::DataTypeSize(tensor->dtype()),
                  stream);
     delete[] temp_ptr;
 #endif
diff --git a/paddle/fluid/distributed/ps/service/heter_client.cc b/paddle/fluid/distributed/ps/service/heter_client.cc
index e9e3ec1d9df47..8aebae237360e 100644
--- a/paddle/fluid/distributed/ps/service/heter_client.cc
+++ b/paddle/fluid/distributed/ps/service/heter_client.cc
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include "paddle/fluid/distributed/ps/service/heter_client.h"
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/string/split.h"
 
@@ -39,13 +41,13 @@ int GetMicroId(const platform::DeviceContext& ctx,
   } else {
 #ifdef PADDLE_WITH_CUDA
     std::vector<char> temp;
-    temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
+    temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
     char* temp_ptr = temp.data();
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     memory::Copy(
         platform::CPUPlace(), temp_ptr, tensor->place(), tensor->data(),
-        tensor->numel() * framework::SizeOfType(tensor->type()), stream);
+        tensor->numel() * framework::DataTypeSize(tensor->dtype()), stream);
     float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
     micro_id = static_cast<int>(temp_ptr_float[0]);
 #endif
diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc
index 8bfeaf47b23c3..56fdf542bbb93 100644
--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/eager/grad_tensor_holder.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/var_type.h"
 #include "paddle/pten/kernels/funcs/math_function.h"
 
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 95c814380e3d3..a3f0ed392646c 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -452,4 +452,10 @@ endif()
 
 cc_test(scope_guard_test SRCS scope_guard_test.cc)
 cc_test(pten_utils_test SRCS pten_utils_test.cc DEPS pten_utils)
+
+if(WITH_GPU OR WITH_ROCM)
+  cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info)
+else()
+  cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place)
+endif()
 cc_test(custom_kernel_test SRCS custom_kernel_test.cc DEPS custom_kernel pten_tensor)
diff --git a/paddle/fluid/framework/convert_utils.cc b/paddle/fluid/framework/convert_utils.cc
new file mode 100644
index 0000000000000..78dd030698de3
--- /dev/null
+++ b/paddle/fluid/framework/convert_utils.cc
@@ -0,0 +1,185 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/framework/convert_utils.h"
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
+
+namespace paddle {
+namespace framework {
+
+paddle::experimental::DataType TransToPtenDataType(
+    const paddle::framework::proto::VarType::Type& dtype) {
+  // Set the order of case branches according to the frequency with
+  // the data type is used
+  switch (dtype) {
+    case paddle::framework::proto::VarType::FP32:
+      return DataType::FLOAT32;
+    case paddle::framework::proto::VarType::FP64:
+      return DataType::FLOAT64;
+    case paddle::framework::proto::VarType::INT64:
+      return DataType::INT64;
+    case paddle::framework::proto::VarType::INT32:
+      return DataType::INT32;
+    case paddle::framework::proto::VarType::INT8:
+      return DataType::INT8;
+    case paddle::framework::proto::VarType::UINT8:
+      return DataType::UINT8;
+    case paddle::framework::proto::VarType::INT16:
+      return DataType::INT16;
+    case paddle::framework::proto::VarType::COMPLEX64:
+      return DataType::COMPLEX64;
+    case paddle::framework::proto::VarType::COMPLEX128:
+      return DataType::COMPLEX128;
+    case paddle::framework::proto::VarType::FP16:
+      return DataType::FLOAT16;
+    case paddle::framework::proto::VarType::BF16:
+      return DataType::BFLOAT16;
+    case paddle::framework::proto::VarType::BOOL:
+      return DataType::BOOL;
+    default:
+      return DataType::UNDEFINED;
+  }
+}
+
+paddle::framework::proto::VarType::Type TransToProtoVarType(
+    const paddle::experimental::DataType& dtype) {
+  // Set the order of case branches according to the frequency with
+  // the data type is used
+  switch (dtype) {
+    case DataType::FLOAT32:
+      return paddle::framework::proto::VarType::FP32;
+    case DataType::FLOAT64:
+      return paddle::framework::proto::VarType::FP64;
+    case DataType::INT64:
+      return paddle::framework::proto::VarType::INT64;
+    case DataType::INT32:
+      return paddle::framework::proto::VarType::INT32;
+    case DataType::INT8:
+      return paddle::framework::proto::VarType::INT8;
+    case DataType::UINT8:
+      return paddle::framework::proto::VarType::UINT8;
+    case DataType::INT16:
+      return paddle::framework::proto::VarType::INT16;
+    case DataType::COMPLEX64:
+      return paddle::framework::proto::VarType::COMPLEX64;
+    case DataType::COMPLEX128:
+      return paddle::framework::proto::VarType::COMPLEX128;
+    case DataType::FLOAT16:
+      return paddle::framework::proto::VarType::FP16;
+    case DataType::BFLOAT16:
+      return paddle::framework::proto::VarType::BF16;
+    case DataType::BOOL:
+      return paddle::framework::proto::VarType::BOOL;
+    default:
+      PADDLE_THROW(paddle::platform::errors::Unimplemented(
+          "Unsupported data type `%s` when casting it into "
+          "paddle data type.",
+          dtype));
+  }
+}
+
+size_t DataTypeSize(DataType dtype) {
+  switch (dtype) {
+    case DataType::UNDEFINED:
+      return 0;
+    case DataType::BOOL:
+      return sizeof(bool);
+    case DataType::INT8:
+      return sizeof(int8_t);
+    case DataType::UINT8:
+      return sizeof(uint8_t);
+    case DataType::INT16:
+      return sizeof(int16_t);
+    case DataType::INT32:
+      return sizeof(int);
+    case DataType::INT64:
+      return sizeof(int64_t);
+    case DataType::BFLOAT16:
+      return sizeof(paddle::platform::bfloat16);
+    case DataType::FLOAT16:
+      return sizeof(paddle::platform::float16);
+    case DataType::FLOAT32:
+      return sizeof(float);
+    case DataType::FLOAT64:
+      return sizeof(double);
+    case DataType::COMPLEX64:
+      return sizeof(paddle::platform::complex<float>);
+    case DataType::COMPLEX128:
+      return sizeof(paddle::platform::complex<double>);
+    default:
+      return 0;
+  }
+}
+
+DataType String2DataType(const std::string& str) {
+  if (str == "bool") {
+    return DataType::BOOL;
+  } else if (str == "float16") {
+    return DataType::FLOAT16;
+  } else if (str == "float32") {
+    return DataType::FLOAT32;
+  } else if (str == "float64") {
+    return DataType::FLOAT64;
+  } else if (str == "int8") {
+    return DataType::INT8;
+  } else if (str == "int16") {
+    return DataType::INT16;
+  } else if (str == "int32") {
+    return DataType::INT32;
+  } else if (str == "int64") {
+    return DataType::INT64;
+  } else if (str == "uint8") {
+    return DataType::UINT8;
+  } else if (str == "complex64") {
+    return DataType::COMPLEX64;
+  } else if (str == "complex128") {
+    return DataType::COMPLEX128;
+  } else {
+    return DataType::UNDEFINED;
+  }
+}
+
+std::string DataType2String(DataType dtype) {
+  switch (dtype) {
+    case DataType::BOOL:
+      return "bool";
+    case DataType::INT8:
+      return "int8";
+    case DataType::UINT8:
+      return "uint8";
+    case DataType::INT16:
+      return "int16";
+    case DataType::INT32:
+      return "int32";
+    case DataType::INT64:
+      return "int64";
+    case DataType::FLOAT16:
+      return "float16";
+    case DataType::FLOAT32:
+      return "float32";
+    case DataType::FLOAT64:
+      return "float64";
+    case DataType::COMPLEX64:
+      return "complex64";
+    case DataType::COMPLEX128:
+      return "complex128";
+    default:
+      PADDLE_THROW(paddle::platform::errors::InvalidArgument(
+          "Unknow pten::DataType, the int value = %d.",
+          static_cast<int>(dtype)));
+      return "";
+  }
+}
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/convert_utils.h b/paddle/fluid/framework/convert_utils.h
new file mode 100644
index 0000000000000..e870e57b72130
--- /dev/null
+++ b/paddle/fluid/framework/convert_utils.h
@@ -0,0 +1,46 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/pten/common/backend.h"
+#include "paddle/pten/common/data_type.h"
+#include "paddle/pten/common/layout.h"
+#include "paddle/pten/core/tensor_meta.h"
+
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/platform/place.h"
+
+// TODO(chenweihang): this file may need to be removed
+
+namespace paddle {
+namespace framework {
+
+using DataType = paddle::experimental::DataType;
+using DataLayout = paddle::experimental::DataLayout;
+
+DataType TransToPtenDataType(
+    const paddle::framework::proto::VarType::Type& dtype);
+
+paddle::framework::proto::VarType::Type TransToProtoVarType(
+    const DataType& dtype);
+
+size_t DataTypeSize(DataType dtype);
+DataType String2DataType(const std::string& str);
+std::string DataType2String(DataType dtype);
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc
index d3a4909071e81..63c99ec050f0a 100644
--- a/paddle/fluid/framework/custom_operator.cc
+++ b/paddle/fluid/framework/custom_operator.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/framework/attribute.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_meta_info_helper.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/operator.h"
@@ -777,12 +778,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
           std::vector<DataType> vec_custom_dtype;
           for (size_t i = 0; i < ctx->InputSize(in_name); ++i) {
             auto dtype = ctx->GetInputDataType(in_name, i);
-            vec_custom_dtype.emplace_back(pten::TransToPtenDataType(dtype));
+            vec_custom_dtype.emplace_back(
+                paddle::framework::TransToPtenDataType(dtype));
           }
           vec_input_dtypes.emplace_back(vec_custom_dtype);
         } else {
           auto dtype = ctx->GetInputDataType(in_name);
-          input_dtypes.emplace_back(pten::TransToPtenDataType(dtype));
+          input_dtypes.emplace_back(
+              paddle::framework::TransToPtenDataType(dtype));
         }
       }
 
@@ -794,12 +797,14 @@ void RegisterOperatorWithMetaInfo(const std::vector<OpMetaInfo>& op_meta_infos,
         auto out_name = op_outputs[i];
         if (detail::IsDuplicableVar(out_name)) {
           for (size_t j = 0; j < output_dtypes.size(); ++j) {
-            auto dtype = pten::TransToProtoVarType(output_dtypes[i]);
+            auto dtype =
+                paddle::framework::TransToProtoVarType(output_dtypes[i]);
             ctx->SetOutputDataType(out_name, dtype, j);
           }
         } else {
-          ctx->SetOutputDataType(out_name,
-                                 pten::TransToProtoVarType(output_dtypes[i]));
+          ctx->SetOutputDataType(
+              out_name,
+              paddle::framework::TransToProtoVarType(output_dtypes[i]));
         }
       }
     };
diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index a014d34bcf5f0..1bf6f12e63cbb 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -18,6 +18,7 @@
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace framework {
@@ -79,10 +80,10 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
   }
 
   out->Resize(make_ddim(dst_dim));
-  out->mutable_data(expected_kernel_type.place_, in.type());
+  out->mutable_data(expected_kernel_type.place_, in.dtype());
 
   framework::VisitDataType(
-      in.type(),
+      framework::TransToProtoVarType(in.dtype()),
       CastDataLayout(pool.Get(expected_kernel_type.place_), axis, in, out));
 
   out->set_layout(expected_kernel_type.data_layout_);
@@ -153,11 +154,13 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
   auto in_tz = paddle::framework::vectorize<int64_t>(in.dims());
   auto out_tz = in_tz;
 
-  memory::data_type in_type = ToMKLDNNDataType(in.type());
-  PADDLE_ENFORCE_NE(in_type, memory::data_type::undef,
-                    platform::errors::InvalidArgument(
-                        "Input tensor type (%s) is not supported.",
-                        DataTypeToString(in.type())));
+  memory::data_type in_type =
+      ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
+  PADDLE_ENFORCE_NE(
+      in_type, memory::data_type::undef,
+      platform::errors::InvalidArgument(
+          "Input tensor type (%s) is not supported.",
+          DataTypeToString(framework::TransToProtoVarType(in.dtype()))));
 
   auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
   auto out_format =
@@ -169,8 +172,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
   if ((in_format != out_format) || always_copy) {
     void* in_data = GetDataFromTensor(in, in_type);
 
-    platform::ReorderMKLDNNHandler handler(in_tz, in.type(), in_type,
-                                           cpu_engine);
+    platform::ReorderMKLDNNHandler handler(
+        in_tz, framework::TransToProtoVarType(in.dtype()), in_type, cpu_engine);
 
     auto reorder_src_memory_p = handler.AcquireSrcMemory(in_format, in_data);
     auto reorder_dst_memory_p =
diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc
index 8e762b913f2cf..7152004b63de6 100644
--- a/paddle/fluid/framework/data_type_test.cc
+++ b/paddle/fluid/framework/data_type_test.cc
@@ -16,6 +16,7 @@
 #include <string>
 
 #include "gtest/gtest.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/tensor.h"
 
 TEST(DataType, float16) {
@@ -27,10 +28,11 @@ TEST(DataType, float16) {
 
   Tensor tensor;
   CPUPlace cpu;
-  tensor.mutable_data(cpu, dtype);
+  tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
 
   // test fp16 tensor
-  EXPECT_EQ(tensor.type(), f::ToDataType(typeid(float16)));
+  EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
+            f::ToDataType(typeid(float16)));
 
   // test fp16 size
   EXPECT_EQ(f::SizeOfType(dtype), 2u);
@@ -49,10 +51,11 @@ TEST(DataType, bfloat16) {
 
   Tensor tensor;
   CPUPlace cpu;
-  tensor.mutable_data(cpu, dtype);
+  tensor.mutable_data(cpu, f::TransToPtenDataType(dtype));
 
   // test bf16 tensor
-  EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16)));
+  EXPECT_EQ(f::TransToProtoVarType(tensor.dtype()),
+            f::ToDataType(typeid(bfloat16)));
 
   // test bf16 size
   EXPECT_EQ(f::SizeOfType(dtype), 2u);
diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc
index 5b6aedd2fe14b..14b5662b24aeb 100644
--- a/paddle/fluid/framework/data_type_transform.cc
+++ b/paddle/fluid/framework/data_type_transform.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/data_type_transform.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/platform/transform.h"
 
@@ -65,12 +66,14 @@ struct CastDataType {
 void TransDataType(const OpKernelType& kernel_type_for_var,
                    const OpKernelType& expected_kernel_type, const Tensor& in,
                    Tensor* out) {
-  PADDLE_ENFORCE_EQ(in.type(), kernel_type_for_var.data_type_,
-                    platform::errors::InvalidArgument(
-                        "The src dtype(%s) of input tensor and kernel_type(%s) "
-                        "are not conststent.",
-                        DataTypeToString(in.type()),
-                        DataTypeToString(kernel_type_for_var.data_type_)));
+  PADDLE_ENFORCE_EQ(
+      framework::TransToProtoVarType(in.dtype()),
+      kernel_type_for_var.data_type_,
+      platform::errors::InvalidArgument(
+          "The src dtype(%s) of input tensor and kernel_type(%s) "
+          "are not conststent.",
+          DataTypeToString(framework::TransToProtoVarType(in.dtype())),
+          DataTypeToString(kernel_type_for_var.data_type_)));
   auto dst_type = expected_kernel_type.data_type_;
   TransDataType(in, dst_type, out);
 }
@@ -81,7 +84,7 @@ void TransDataType(const Tensor& in,
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
 
   out->Resize(in.dims());
-  auto src_type = in.type();
+  auto src_type = framework::TransToProtoVarType(in.dtype());
   auto dst_type = type;
   auto ctx = pool.Get(in.place());
 
diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc
index 633963d1793d3..1facbe850ee52 100644
--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/all_reduce_op_handle.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/platform/place.h"
@@ -127,7 +128,7 @@ void AllReduceOpHandle::AllReduceImpl(
           platform::errors::PreconditionNotMet(
               "The numel of tensor %s should be > 0, but got numel is %d.",
               in_var_handles[i]->name(), numel));
-      dtype = lod_tensor.type();
+      dtype = framework::TransToProtoVarType(lod_tensor.dtype());
       is_gpu_place = platform::is_gpu_place(lod_tensor.place());
 #if defined(PADDLE_WITH_XPU_BKCL)
       is_xpu_place = platform::is_xpu_place(lod_tensor.place());
@@ -139,7 +140,7 @@ void AllReduceOpHandle::AllReduceImpl(
             "The size of tensors of the same variable in different local "
             "scopes should be equal."));
     PADDLE_ENFORCE_EQ(
-        dtype, lod_tensor.type(),
+        dtype, framework::TransToProtoVarType(lod_tensor.dtype()),
         platform::errors::PreconditionNotMet(
             "The dtype of tensors of the same variable in different local "
             "scopes should be equal."));
@@ -227,14 +228,15 @@ void AllReduceOpHandle::AllReduceFunc(
 
     // Reduce All Tensor to trg in CPU
     ReduceBufferData func(lod_tensor_data, trg.data(), numel);
-    VisitDataType(trg.type(), func);
+    VisitDataType(framework::TransToProtoVarType(trg.dtype()), func);
 
     for (size_t i = 1; i < local_exec_scopes_.size(); ++i) {
       auto &scope = local_exec_scopes_[i];
       auto &p = places[i];
       auto *var = scope->FindVar(out_var_names[i]);
 
-      size_t size = numel * SizeOfType(trg.type());
+      size_t size =
+          numel * SizeOfType(framework::TransToProtoVarType(trg.dtype()));
       RunAndRecordEvent(p, [&trg, var, p, size] {
         auto dst_ptr = var->GetMutable<framework::LoDTensor>()->data();
         platform::CPUPlace cpu_place;
diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc
index e8fa500e094b3..d058949ec6a19 100644
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/framework/details/broadcast_op_handle.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/place.h"
@@ -87,7 +88,8 @@ void BroadcastOpHandle::BroadcastOneVar(
     int root_id = in_tensor.place().device;
     std::vector<std::function<void()>> broadcast_calls;
 
-    int type = platform::ToNCCLDataType(in_tensor.type());
+    int type = platform::ToNCCLDataType(
+        framework::TransToProtoVarType(in_tensor.dtype()));
     size_t numel = static_cast<size_t>(in_tensor.numel());
 
     for (auto out_var_handle : out_var_handles) {
@@ -147,7 +149,8 @@ void BroadcastOpHandle::BroadcastOneVar(
     int root_id = in_tensor.place().device;
     std::vector<std::function<void()>> broadcast_calls;
 
-    int type = platform::ToBKCLDataType(in_tensor.type());
+    int type = platform::ToBKCLDataType(
+        framework::TransToProtoVarType(in_tensor.dtype()));
     size_t numel = static_cast<size_t>(in_tensor.numel());
 
     for (auto out_var_handle : out_var_handles) {
@@ -239,7 +242,7 @@ void BroadcastOpHandle::InitOutputValue(
     }
     VariableVisitor::ShareDimsAndLoD(*in_var, out_var);
     VariableVisitor::GetMutableTensor(out_var).mutable_data(t_out_p,
-                                                            in_tensor.type());
+                                                            in_tensor.dtype());
   }
 }
 
diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc
index 69741bd3c9719..69c39acc5fe59 100644
--- a/paddle/fluid/framework/details/fetch_async_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc
@@ -16,6 +16,7 @@
 
 #include <string>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/profiler.h"
 
 namespace paddle {
@@ -49,7 +50,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
   if (tensor->numel() && tensor->IsInitialized()) {
     // step1: check type
     PADDLE_ENFORCE_EQ(
-        type, tensor->type(),
+        type, framework::TransToProtoVarType(tensor->dtype()),
         platform::errors::InvalidArgument(
             "The data type of fetched Tensors or the items of fetched "
             "LoDTensorArray are different from each other on different "
@@ -57,7 +58,7 @@ static void CheckTensorAttrs(const LoDTensor *tensor,
             "(th) fetched variable. Please set the "
             "parameter `return_merged = False` when you "
             "call the `Executor.run()` method.",
-            DataTypeToString(type), DataTypeToString(tensor->type()), offset));
+            DataTypeToString(type), tensor->dtype(), offset));
 
     // step2: check layout
     PADDLE_ENFORCE_EQ(
@@ -139,7 +140,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
   for (auto *t : src_lodtensors) {
     if (t->numel() && t->IsInitialized()) {
       check_dim = t->dims();
-      new_type = t->type();
+      new_type = paddle::framework::TransToProtoVarType(t->dtype());
       new_layout = t->layout();
       break;
     }
@@ -169,10 +170,10 @@ void FetchAsyncOpHandle::FetchMergedLodTensor(
   dst_lodtensor->set_lod(src_lodtensors[0]->lod());
   if (platform::is_gpu_place(src_lodtensors[0]->place())) {
     dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(),
-                                src_lodtensors[0]->type());
+                                src_lodtensors[0]->dtype());
   } else {
     dst_lodtensor->mutable_data(platform::CPUPlace(),
-                                src_lodtensors[0]->type());
+                                src_lodtensors[0]->dtype());
   }
 
   // slice and memcpy
diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
index af1b73f40be53..8f76de2393eaa 100644
--- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
 #include "paddle/fluid/platform/device_memory_aligment.h"
@@ -337,7 +338,8 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel(
   size_t size_of_dtype = 0;
   for (size_t i = 0; i < grad_tensor.size(); ++i) {
     // Get dtype
-    auto ele_dtype = grad_tensor.at(i).second->type();
+    auto ele_dtype =
+        framework::TransToProtoVarType(grad_tensor.at(i).second->dtype());
     if (i == 0) {
       *dtype = ele_dtype;
       size_of_dtype = framework::SizeOfType(ele_dtype);
diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc
index 430f55793b736..506f4bc5af2da 100644
--- a/paddle/fluid/framework/details/gather_op_handle.cc
+++ b/paddle/fluid/framework/details/gather_op_handle.cc
@@ -115,7 +115,7 @@ void GatherOpHandle::RunImpl() {
   DDim out_dim = pre_in_value.GetCompleteDims();
   out_dim[0] = static_cast<int64_t>(rows);
   out_value->mutable_value()->Resize(out_dim).mutable_data(
-      t_out_p, pre_in_value.value().type());
+      t_out_p, pre_in_value.value().dtype());
   Tensor *out_tensor = out_value->mutable_value();
 
   // copy
diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc
index f57136e1f0ed9..2c0d918287a52 100644
--- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc
+++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc
@@ -19,6 +19,7 @@
 #ifdef PADDLE_WITH_ASCEND_CL
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace framework {
@@ -307,7 +308,7 @@ void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
                                               const platform::Place& place) {
   TensorCheckerVisitor<platform::CPUDeviceContext> vistor(op_type, var_name,
                                                           tensor, place);
-  VisitDataType(tensor.type(), vistor);
+  VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
 }
 
 void CheckVarHasNanOrInf(const std::string& op_type,
@@ -348,7 +349,8 @@ void CheckVarHasNanOrInf(const std::string& op_type,
     return;
   } else if (platform::is_xpu_place(tensor->place())) {
 #ifdef PADDLE_WITH_XPU
-    if (tensor->type() != proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) !=
+        proto::VarType::FP32) {
       return;
     }
 
@@ -377,14 +379,15 @@ void CheckVarHasNanOrInf(const std::string& op_type,
     return;
   } else if (platform::is_npu_place(tensor->place())) {
 #ifdef PADDLE_WITH_ASCEND_CL
-    if (tensor->type() != proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) !=
+        proto::VarType::FP32) {
       return;
     }
 
     framework::LoDTensor cpu_tensor;
     cpu_tensor.Resize(tensor->dims());
     float* cpu_data = static_cast<float*>(
-        cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type()));
+        cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()));
 
     framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
     bool flag = false;
@@ -475,8 +478,10 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
     return;
   }
 
-  if ((tensor->type() != proto::VarType::FP32) &&
-      (tensor->type() != proto::VarType::FP16)) {
+  if ((framework::TransToProtoVarType(tensor->dtype()) !=
+       proto::VarType::FP32) &&
+      (framework::TransToProtoVarType(tensor->dtype()) !=
+       proto::VarType::FP16)) {
     return;
   }
 
@@ -490,16 +495,17 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name,
 
   framework::Tensor cpu_tensor;
   cpu_tensor.Resize(tensor->dims());
-  cpu_tensor.mutable_data(platform::CPUPlace(), tensor->type());
+  cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype());
   framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor);
 
   LOG(WARNING) << "print [" << var_name << "] tensor info:";
   // use env strategy control in future, -1=print_all.
   int print_num = 3;
-  if (tensor->type() == proto::VarType::FP32) {
+  if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
     const float* value = cpu_tensor.data<float>();
     PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
-  } else if (tensor->type() == proto::VarType::FP16) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::FP16) {
     const paddle::platform::float16* value =
         cpu_tensor.data<paddle::platform::float16>();
     PrintNanInf(value, tensor->numel(), print_num, op_type, var_name, false);
diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu
index bf38a56dc9372..f6a97160d8271 100644
--- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu
+++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu
@@ -19,6 +19,7 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace framework {
@@ -208,7 +209,7 @@ void tensor_check<platform::CUDADeviceContext>(const std::string& op_type,
 
   TensorCheckerVisitor<platform::CUDADeviceContext> vistor(op_type, var_name,
                                                            tensor, place);
-  VisitDataType(tensor.type(), vistor);
+  VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
 }
 
 }  // namespace details
diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h
index 6d136055da782..66ea7a15815ff 100644
--- a/paddle/fluid/framework/details/reduce_and_gather.h
+++ b/paddle/fluid/framework/details/reduce_and_gather.h
@@ -130,7 +130,8 @@ struct GatherLocalSelectedRowsFunctor {
     DDim out_dim = pre_in->GetCompleteDims();
     out_dim[0] = static_cast<int64_t>(rows);
     dst_tensor.mutable_value()->Resize(out_dim);
-    dst_tensor.mutable_value()->mutable_data(out_place, pre_in->value().type());
+    dst_tensor.mutable_value()->mutable_data(out_place,
+                                             pre_in->value().dtype());
   }
 
   void operator()() {
diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc
index 5cf84a04958b8..4df42a7d93d19 100644
--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/framework/details/reduce_op_handle.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@@ -150,7 +151,8 @@ void ReduceOpHandle::RunImpl() {
         if (!FLAGS_cpu_deterministic) {
           ReduceLoDTensor func(lod_tensors,
                                out_var->GetMutable<framework::LoDTensor>());
-          VisitDataType(lod_tensors[0]->type(), func);
+          VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
+                        func);
         } else {
           // We sum lod_tensors to reduce_sum_trg which is in local_scopes_0
           // here, but it doesn't mean reduce_sum_trg must be in local_scopes_0.
@@ -158,7 +160,8 @@ void ReduceOpHandle::RunImpl() {
                                       ->FindVar(out_var_handle->name())
                                       ->GetMutable<framework::LoDTensor>();
           ReduceLoDTensor func(lod_tensors, &reduce_sum_trg);
-          VisitDataType(lod_tensors[0]->type(), func);
+          VisitDataType(framework::TransToProtoVarType(lod_tensors[0]->dtype()),
+                        func);
 
           auto trg = out_var->GetMutable<framework::LoDTensor>();
           if (reduce_sum_trg.data() != trg->data()) {
@@ -171,7 +174,7 @@ void ReduceOpHandle::RunImpl() {
       auto pre_in = pre_in_var->Get<framework::LoDTensor>();
       VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
       VariableVisitor::GetMutableTensor(out_var).mutable_data(
-          out_var_handle->place(), pre_in.type());
+          out_var_handle->place(), pre_in.dtype());
 
       auto out_p = out_var_handle->place();
       int root_id = out_p.device;
@@ -191,7 +194,8 @@ void ReduceOpHandle::RunImpl() {
                   out_var_handle->place());
         }
 
-        int type = platform::ToNCCLDataType(lod_tensor.type());
+        int type = platform::ToNCCLDataType(
+            framework::TransToProtoVarType(lod_tensor.dtype()));
         size_t numel = static_cast<size_t>(lod_tensor.numel());
         all_reduce_calls.emplace_back(
             [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] {
@@ -217,7 +221,7 @@ void ReduceOpHandle::RunImpl() {
       auto pre_in = pre_in_var->Get<framework::LoDTensor>();
       VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
       VariableVisitor::GetMutableTensor(out_var).mutable_data(
-          out_var_handle->place(), pre_in.type());
+          out_var_handle->place(), pre_in.dtype());
 
       auto out_p = out_var_handle->place();
       int root_id = out_p.device;
@@ -237,7 +241,8 @@ void ReduceOpHandle::RunImpl() {
                   out_var_handle->place());
         }
 
-        int type = platform::ToBKCLDataType(lod_tensor.type());
+        int type = platform::ToBKCLDataType(
+            framework::TransToProtoVarType(lod_tensor.dtype()));
         size_t numel = static_cast<size_t>(lod_tensor.numel());
         all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id,
                                        &bkcl_ctx] {
diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
index 1ab944720f8f4..f0de723c20b74 100644
--- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
@@ -17,6 +17,7 @@
 #include <utility>
 
 #include "dgc/dgc.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/container_cast.h"
 #include "paddle/fluid/framework/details/reduce_and_gather.h"
 #include "paddle/fluid/framework/details/variable_visitor.h"
@@ -151,7 +152,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
     auto &out = *outs[i];
     float *out_tensor_buf = out.data<float>();
 
-    dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype;
+    dtype = (dtype == -1) ? platform::ToNCCLDataType(
+                                framework::TransToProtoVarType(in.dtype()))
+                          : dtype;
     in_numel = (in_numel == 0) ? static_cast<size_t>(in.numel()) : in_numel;
     PADDLE_ENFORCE_EQ(in_numel % 2, 0,
                       platform::errors::InvalidArgument(
diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc
index 9979d2ee20531..cc47fe50dc8bb 100644
--- a/paddle/fluid/framework/details/variable_visitor.cc
+++ b/paddle/fluid/framework/details/variable_visitor.cc
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/framework/details/variable_visitor.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 
 namespace pten {
@@ -115,7 +116,7 @@ struct EnforceShapeAndDTypeEQVisitor {
             "The place type of the two variables is not equal. The src place "
             "is %s, but the dst place is %s",
             src.place().DebugString(), tensor.place().DebugString()));
-    PADDLE_ENFORCE_EQ(src.type(), tensor.type(),
+    PADDLE_ENFORCE_EQ(src.dtype(), tensor.dtype(),
                       platform::errors::PreconditionNotMet(
                           "The dtype of the two variables is not equal."));
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc
index e191979c50522..8de74b2c477a1 100644
--- a/paddle/fluid/framework/device_worker.cc
+++ b/paddle/fluid/framework/device_worker.cc
@@ -14,6 +14,8 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/device_worker.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
+
 namespace pten {
 class DenseTensor;
 }  // namespace pten
@@ -58,11 +60,13 @@ std::string PrintLodTensorIntType(Tensor* tensor, int64_t start, int64_t end) {
 
 std::string PrintLodTensor(Tensor* tensor, int64_t start, int64_t end) {
   std::string out_val;
-  if (tensor->type() == proto::VarType::FP32) {
+  if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) {
     out_val = PrintLodTensorType<float>(tensor, start, end);
-  } else if (tensor->type() == proto::VarType::INT64) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::INT64) {
     out_val = PrintLodTensorIntType(tensor, start, end);
-  } else if (tensor->type() == proto::VarType::FP64) {
+  } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+             proto::VarType::FP64) {
     out_val = PrintLodTensorType<double>(tensor, start, end);
   } else {
     out_val = "unsupported type";
diff --git a/paddle/fluid/framework/dist_multi_trainer.cc b/paddle/fluid/framework/dist_multi_trainer.cc
index 4c8681aad2bb1..c0a9475f6e6d6 100644
--- a/paddle/fluid/framework/dist_multi_trainer.cc
+++ b/paddle/fluid/framework/dist_multi_trainer.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker_factory.h"
 #include "paddle/fluid/framework/trainer.h"
 
@@ -153,12 +154,13 @@ void DistMultiTrainer::Finalize() {
       }
 #define MergeCallback(cpp_type, proto_type)                                    \
   do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
         VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                 << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
         exit(-1);                                                              \
       }                                                                        \
       MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \
diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc
index 2ceeed694af47..24f1591ff33c9 100644
--- a/paddle/fluid/framework/dlpack_tensor.cc
+++ b/paddle/fluid/framework/dlpack_tensor.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/framework/dlpack_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 
 namespace paddle {
@@ -134,7 +135,8 @@ DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) {
   t_.device = paddle::platform::VisitPlace(place, internal::DLDeviceVisitor());
 
   // init dtype
-  t_.dtype = internal::GetDLDataTypeFromTypeIndex(tensor.type());
+  t_.dtype = internal::GetDLDataTypeFromTypeIndex(
+      framework::TransToProtoVarType(tensor.dtype()));
   t_.dtype.lanes = lanes;
 
   // init ndim, tensor rank
diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc
index 668a28f6008ef..e45c630002dd6 100644
--- a/paddle/fluid/framework/executor_thread_worker.cc
+++ b/paddle/fluid/framework/executor_thread_worker.cc
@@ -20,6 +20,7 @@ limitations under the License. */
 #include "google/protobuf/text_format.h"
 
 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
@@ -235,16 +236,16 @@ void print_lod_tensor(std::string var_name, const LoDTensor& lod_tensor) {
 static void print_fetch_var(Scope* scope, const std::string& var_name) {
   auto& tensor = scope->FindVar(var_name)->Get<LoDTensor>();
 
-#define PrintLoDTensorCallback(cpp_type, proto_type) \
-  do {                                               \
-    if (tensor.type() == proto_type) {               \
-      print_lod_tensor<cpp_type>(var_name, tensor);  \
-      return;                                        \
-    }                                                \
+#define PrintLoDTensorCallback(cpp_type, proto_type)                    \
+  do {                                                                  \
+    if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) { \
+      print_lod_tensor<cpp_type>(var_name, tensor);                     \
+      return;                                                           \
+    }                                                                   \
   } while (0)
 
   _ForEachDataType_(PrintLoDTensorCallback);
-  VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.type();
+  VLOG(1) << "print_fetch_var: unrecognized data type:" << tensor.dtype();
 }
 
 void ExecutorThreadWorker::TrainFilesWithTimer() {
diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.h b/paddle/fluid/framework/fleet/ascend_wrapper.h
index 4127adf1bfe27..d55862120116d 100644
--- a/paddle/fluid/framework/fleet/ascend_wrapper.h
+++ b/paddle/fluid/framework/fleet/ascend_wrapper.h
@@ -22,6 +22,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
@@ -146,13 +147,16 @@ class AscendInstance {
     // }
 
     ge::Shape shape(vec_dim);
-    ge::TensorDesc tensor_desc(shape, ge::Format::FORMAT_ND,
-                               VarTypeToGeType(tensor->type()));
+    ge::TensorDesc tensor_desc(
+        shape, ge::Format::FORMAT_ND,
+        VarTypeToGeType(framework::TransToProtoVarType(tensor->dtype())));
     tensor_desc.SetRealDimCnt(vec_dim.size());
 
     const uint8_t *data = reinterpret_cast<const uint8_t *>(tensor->data());
-    std::vector<uint8_t> dst(numel * GeTypeSize(tensor->type()));
-    memcpy(dst.data(), data, GeTypeSize(tensor->type()) * numel);
+    std::vector<uint8_t> dst(
+        numel * GeTypeSize(framework::TransToProtoVarType(tensor->dtype())));
+    memcpy(dst.data(), data,
+           GeTypeSize(framework::TransToProtoVarType(tensor->dtype())) * numel);
     ge::Tensor ge_tensor(tensor_desc, dst);
     return ge_tensor;
   }
diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc
index 88c9a363d2789..11df5082952e5 100644
--- a/paddle/fluid/framework/fleet/heter_wrapper.cc
+++ b/paddle/fluid/framework/fleet/heter_wrapper.cc
@@ -28,6 +28,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/fleet/heter_wrapper.h"
 #if defined(PADDLE_WITH_PSLIB) && !defined(PADDLE_WITH_HETERPS)
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker.h"
 
 namespace paddle {
@@ -90,7 +91,8 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
   LoDTensor* tensor = var->GetMutable<LoDTensor>();
   req_var->set_varname(varname);
   req_var->set_type(LOD_TENSOR);
-  req_var->set_data_type(static_cast<VariableMessage::Type>(tensor->type()));
+  req_var->set_data_type(static_cast<VariableMessage::Type>(
+      framework::TransToProtoVarType(tensor->dtype())));
 
   for (auto& dim : framework::vectorize(tensor->dims())) {
     req_var->add_dims(dim);
@@ -108,21 +110,27 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope,
 
   auto* req_data = req_var->mutable_data();
   req_data->clear();
-  req_data->resize(tensor->numel() * SizeOfType(tensor->type()));
+  req_data->resize(tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
   char* data_ptr = const_cast<char*>(req_data->data());
 
   if (platform::is_cpu_place(tensor->place())) {
     memcpy(data_ptr, tensor->data(),
-           tensor->numel() * SizeOfType(tensor->type()));
+           tensor->numel() *
+               SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
   } else {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
-                 tensor->data(), tensor->numel() * SizeOfType(tensor->type()),
-                 nullptr);
+    memory::Copy(
+        platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
+        tensor->numel() *
+            SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
+        nullptr);
 #endif
 #ifdef PADDLE_WITH_XPU
-    memory::Copy(platform::CPUPlace(), data_ptr, tensor->place(),
-                 tensor->data(), tensor->numel() * SizeOfType(tensor->type()));
+    memory::Copy(
+        platform::CPUPlace(), data_ptr, tensor->place(), tensor->data(),
+        tensor->numel() *
+            SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
   }
 }
@@ -152,15 +160,18 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
   }
   tensor->set_lod(lod);
 
-  void* tensor_data =
-      tensor->mutable_data(place, ToVarType(req_var.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
-               tensor->numel() * SizeOfType(tensor->type()), stream);
+               tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())),
+               stream);
 #else
   memcpy(tensor_data, req_var.data().data(),
-         tensor->numel() * SizeOfType(tensor->type()));
+         tensor->numel() *
+             SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
 }
 #endif
@@ -190,15 +201,17 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
   }
   tensor->set_lod(lod);
 
-  void* tensor_data =
-      tensor->mutable_data(place, ToVarType(req_var.data_type()));
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(ToVarType(req_var.data_type())));
 
 #ifdef PADDLE_WITH_XPU
   memory::Copy(place, tensor_data, platform::CPUPlace(), req_var.data().data(),
-               tensor->numel() * SizeOfType(tensor->type()));
+               tensor->numel() *
+                   SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #else
   memcpy(tensor_data, req_var.data().data(),
-         tensor->numel() * SizeOfType(tensor->type()));
+         tensor->numel() *
+             SizeOfType(framework::TransToProtoVarType(tensor->dtype())));
 #endif
 }
 
diff --git a/paddle/fluid/framework/heter_section_worker.cc b/paddle/fluid/framework/heter_section_worker.cc
index 8e94bb1d0e149..6f74d088764d4 100644
--- a/paddle/fluid/framework/heter_section_worker.cc
+++ b/paddle/fluid/framework/heter_section_worker.cc
@@ -11,7 +11,9 @@ limitations under the License. */
 
 #if defined(PADDLE_WITH_PSCORE)
 #include <float.h>
+
 #include "paddle/fluid/distributed/ps/service/heter_server.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/framework/executor_gc_helper.h"
 #include "paddle/fluid/platform/cpu_helper.h"
@@ -35,21 +37,23 @@ void SetMicroId(paddle::framework::Scope* scope,
   auto* tensor = var->GetMutable<framework::LoDTensor>();
   std::vector<int> dims{1};
   tensor->Resize(framework::make_ddim(dims));
-  void* tensor_data =
-      tensor->mutable_data(place, framework::proto::VarType::FP32);
+  void* tensor_data = tensor->mutable_data(
+      place, framework::TransToPtenDataType(framework::proto::VarType::FP32));
   if (platform::is_gpu_place(place)) {
 #ifdef PADDLE_WITH_CUDA
     std::vector<char> temp;
-    temp.resize(tensor->numel() * framework::SizeOfType(tensor->type()));
+    temp.resize(tensor->numel() * framework::DataTypeSize(tensor->dtype()));
     char* temp_ptr = temp.data();
     float* temp_ptr_float = reinterpret_cast<float*>(temp_ptr);
     temp_ptr_float[0] = micro_id;
     auto stream =
         reinterpret_cast<const platform::CUDADeviceContext&>(*dev_ctx).stream();
-    memory::Copy(place, tensor_data, platform::CPUPlace(),
-                 reinterpret_cast<void*>(temp_ptr),
-                 tensor->numel() * framework::SizeOfType(tensor->type()),
-                 stream);
+    memory::Copy(
+        place, tensor_data, platform::CPUPlace(),
+        reinterpret_cast<void*>(temp_ptr),
+        tensor->numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(tensor->dtype())),
+        stream);
 #endif
   } else {
     float* temp_ptr = reinterpret_cast<float*>(tensor_data);
diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc
index e47681820e282..a4af56419a766 100644
--- a/paddle/fluid/framework/heterxpu_trainer.cc
+++ b/paddle/fluid/framework/heterxpu_trainer.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 #include "io/fs.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_feed_factory.h"
 #include "paddle/fluid/framework/data_set.h"
 #include "paddle/fluid/framework/device_worker_factory.h"
@@ -136,18 +137,18 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) {
       InitializeVariable(ptr, proto::VarType::LOD_TENSOR);
       LoDTensor* thread_tensor = ptr->GetMutable<LoDTensor>();
 
-#define HeterMemcpyFunc(cpp_type, proto_type)                           \
-  do {                                                                  \
-    if (root_tensor->type() == proto_type) {                            \
-      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream); \
-    }                                                                   \
+#define HeterMemcpyFunc(cpp_type, proto_type)                                 \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place, stream);       \
+    }                                                                         \
   } while (0)
 
-#define HeterMemcpyXpuFunc(cpp_type, proto_type)                \
-  do {                                                          \
-    if (root_tensor->type() == proto_type) {                    \
-      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place); \
-    }                                                           \
+#define HeterMemcpyXpuFunc(cpp_type, proto_type)                              \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      HeterMemCpy<cpp_type>(thread_tensor, root_tensor, place);               \
+    }                                                                         \
   } while (0)
 #ifdef PADDLE_WITH_CUDA
       _ForEachDataType_(HeterMemcpyFunc);
@@ -318,12 +319,13 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 //      }
 #define MergeCallback(cpp_type, proto_type)                                    \
   do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
         VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                 << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
         exit(-1);                                                              \
       }                                                                        \
       MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \
@@ -334,8 +336,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #ifdef PADDLE_WITH_CUDA
         auto dev_id = thread_tensor->place().device;
         platform::CUDADeviceGuard guard(dev_id);
-        cudaMemset(thread_tensor->data(), 0,
-                   thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+        cudaMemset(
+            thread_tensor->data(), 0,
+            thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                         thread_tensor->dtype())));
 #endif
 #ifdef PADDLE_WITH_XPU
         auto place = thread_tensor->place();
@@ -346,12 +350,16 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
         platform::DeviceContext* dev_ctx = pool.Get(place);
         const platform::XPUDeviceContext* xpu_ctx =
             reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
-        xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0,
-                    thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+        xpu::memset(
+            xpu_ctx->x_context(), thread_tensor->data(), 0,
+            thread_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                         thread_tensor->dtype())));
 #endif
       } else {
         memset(thread_tensor->data(), 0,
-               thread_tensor->numel() * SizeOfType(thread_tensor->type()));
+               thread_tensor->numel() *
+                   SizeOfType(
+                       framework::TransToProtoVarType(thread_tensor->dtype())));
       }
     }
     auto* merge_var = response->add_vars();
@@ -361,8 +369,10 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #ifdef PADDLE_WITH_CUDA
       auto dev_id = root_tensor->place().device;
       platform::CUDADeviceGuard guard(dev_id);
-      cudaMemset(root_tensor->data(), 0,
-                 root_tensor->numel() * SizeOfType(root_tensor->type()));
+      cudaMemset(
+          root_tensor->data(), 0,
+          root_tensor->numel() *
+              SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
 #endif
 #ifdef PADDLE_WITH_XPU
       auto place = root_tensor->place();
@@ -373,12 +383,15 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
       platform::DeviceContext* dev_ctx = pool.Get(place);
       const platform::XPUDeviceContext* xpu_ctx =
           reinterpret_cast<const platform::XPUDeviceContext*>(dev_ctx);
-      xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0,
-                  root_tensor->numel() * SizeOfType(root_tensor->type()));
+      xpu::memset(
+          xpu_ctx->x_context(), root_tensor->data(), 0,
+          root_tensor->numel() *
+              SizeOfType(framework::TransToProtoVarType(root_tensor->dtype())));
 #endif
     } else {
       memset(root_tensor->data(), 0,
-             root_tensor->numel() * SizeOfType(root_tensor->type()));
+             root_tensor->numel() * SizeOfType(framework::TransToProtoVarType(
+                                        root_tensor->dtype())));
     }
   }
   return 0;
diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc
index 0b4c8f4a719af..cb33e87f490c2 100644
--- a/paddle/fluid/framework/hogwild_worker.cc
+++ b/paddle/fluid/framework/hogwild_worker.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <ctime>
+
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
@@ -79,11 +81,11 @@ void HogwildWorker::CreateThreadScope(const ProgramDesc &program) {
         LoDTensor *thread_tensor = ptr1->GetMutable<LoDTensor>();
         LoDTensor *root_tensor =
             root_scope_->FindVar(var->Name())->GetMutable<LoDTensor>();
-#define MemsetCallback(cpp_type, proto_type)                     \
-  do {                                                           \
-    if (root_tensor->type() == proto_type) {                     \
-      SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim); \
-    }                                                            \
+#define MemsetCallback(cpp_type, proto_type)                                  \
+  do {                                                                        \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) { \
+      SetZero<cpp_type>(thread_tensor, root_tensor, tensor_dim);              \
+    }                                                                         \
   } while (0)
         _ForEachDataType_(MemsetCallback);
       }
diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc
index efe2423b0e8a5..9e1958973d2d9 100644
--- a/paddle/fluid/framework/infershape_utils.cc
+++ b/paddle/fluid/framework/infershape_utils.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/infershape_utils.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/pten_utils.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -134,7 +135,7 @@ class CompatMetaTensor : public pten::MetaTensor {
       }
     } else {
       auto* var = BOOST_GET_CONST(VarDesc*, var_);
-      return pten::TransToPtenDataType(var->GetDataType());
+      return paddle::framework::TransToPtenDataType(var->GetDataType());
     }
   }
 
@@ -183,7 +184,7 @@ class CompatMetaTensor : public pten::MetaTensor {
       }
     } else {
       auto* var = BOOST_GET(VarDesc*, var_);
-      var->SetDataType(pten::TransToProtoVarType(dtype));
+      var->SetDataType(paddle::framework::TransToProtoVarType(dtype));
     }
   }
 
diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
index 30a1de15cb052..695c6bcdd169d 100644
--- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
@@ -16,6 +16,7 @@
 
 #include <cmath>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 
 namespace pten {
@@ -216,7 +217,8 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
         patterns::PDNodeName(name_scope_, "eltwise_y_in"));
     // Set shape && datatype manually
     eltwise_y_in_desc.SetShape(framework::vectorize(ac_bias_tensor->dims()));
-    eltwise_y_in_desc.SetDataType(ac_bias_tensor->type());
+    eltwise_y_in_desc.SetDataType(
+        framework::TransToProtoVarType(ac_bias_tensor->dtype()));
     eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel());
     eltwise_y_in_desc.SetPersistable(true);
 
diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
index 194686825ff2d..927a1bfd5c972 100644
--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
@@ -16,6 +16,7 @@
 
 #include <string>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/platform/enforce.h"
 
@@ -285,7 +286,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
     VarDesc eltwise_y_in_desc(
         patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in"));
     eltwise_y_in_desc.SetShape(framework::vectorize(bn_bias_tensor->dims()));
-    eltwise_y_in_desc.SetDataType(bn_bias_tensor->type());
+    eltwise_y_in_desc.SetDataType(
+        framework::TransToProtoVarType(bn_bias_tensor->dtype()));
     eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel());
     eltwise_y_in_desc.SetPersistable(true);
     auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
@@ -531,7 +533,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const {
           name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count)));
       eltwise_y_in_desc.SetShape(
           framework::vectorize(eltwise_y_in_tensor->dims()));
-      eltwise_y_in_desc.SetDataType(eltwise_y_in_tensor->type());
+      eltwise_y_in_desc.SetDataType(
+          framework::TransToProtoVarType(eltwise_y_in_tensor->dtype()));
       eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel());
       eltwise_y_in_desc.SetPersistable(true);
       auto* eltwise_y_in_node = g->CreateVarNode(&eltwise_y_in_desc);
diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
index 9babfd1c982df..c9cefea836054 100644
--- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc
@@ -14,6 +14,7 @@
 
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h"
 #include "paddle/fluid/framework/op_version_registry.h"
@@ -273,10 +274,11 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
             "but has %s elements.",
             eps_tensor->numel()));
     CHECK_TRUE(
-        eps_tensor->type() == proto::VarType::FP32,
+        framework::TransToProtoVarType(eps_tensor->dtype()) ==
+            proto::VarType::FP32,
         ::paddle::string::Sprintf("The LayerNorm divisor epsilon value "
                                   "must be of FP32 data type, but is %s.",
-                                  eps_tensor->type()));
+                                  eps_tensor->dtype()));
 
     CHECK_TRUE(validateReduceOpAttrs(x_mean, x_shape, "input mean"),
                "Validation of input mean node failed.");
@@ -333,7 +335,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
     auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable<LoDTensor>();
     VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale"));
     new_gamma_desc.SetShape({layer_norm_x_mat_dims[1]});
-    new_gamma_desc.SetDataType(gamma_tensor->type());
+    new_gamma_desc.SetDataType(
+        framework::TransToProtoVarType(gamma_tensor->dtype()));
     new_gamma_desc.SetLoDLevel(gamma->Var()->GetLoDLevel());
     new_gamma_desc.SetPersistable(true);
     auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc);
@@ -347,7 +350,8 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const {
     auto* beta_tensor = scope->FindVar(beta->Name())->GetMutable<LoDTensor>();
     VarDesc new_beta_desc(patterns::PDNodeName("layer_norm_fuse", "Bias"));
     new_beta_desc.SetShape({layer_norm_x_mat_dims[1]});
-    new_beta_desc.SetDataType(beta_tensor->type());
+    new_beta_desc.SetDataType(
+        framework::TransToProtoVarType(beta_tensor->dtype()));
     new_beta_desc.SetLoDLevel(beta->Var()->GetLoDLevel());
     new_beta_desc.SetPersistable(true);
     auto* new_beta_node = g->CreateVarNode(&new_beta_desc);
diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
index e41c35ba33fdc..dafcc9c4e16a3 100644
--- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc
@@ -95,7 +95,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }
 
 void MainTest(bool convWithExistingBias) {
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
index 7bcb127450a35..3a78c229bd8fa 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
@@ -125,7 +125,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }
 
 void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
index f89893c050b8c..4077700b28f2e 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass_tester.cc
@@ -438,7 +438,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
                       const char* var_name) {
   auto x = scope->Var(var_name);
   auto tensor = x->GetMutable<LoDTensor>();
-  tensor->mutable_data(place, proto::VarType::FP32, 1);
+  tensor->mutable_data(place,
+                       framework::TransToPtenDataType(proto::VarType::FP32), 1);
 }
 
 void PrepareGraph(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog) {
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index ab2e30a15ea15..a7f186ff0030f 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <stdint.h>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/version.h"
 
 namespace paddle {
@@ -327,7 +328,7 @@ std::vector<LoDTensor> SplitLoDTensor(
     for (size_t i = 0; i < places.size(); ++i) {
       LoDTensor dst;
       dst.Resize(src.dims());
-      dst.mutable_data(places[i], src.type());
+      dst.mutable_data(places[i], src.dtype());
       if (!src.lod().empty()) {
         dst.set_lod(src.lod());
       }
@@ -393,7 +394,7 @@ void MergeLoDTensor(LoDTensor *target,
   for (auto *t : lod_tensors) {
     if (t->numel() && t->IsInitialized()) {
       new_dim = t->dims();
-      new_type = t->type();
+      new_type = framework::TransToProtoVarType(t->dtype());
       new_layout = t->layout();
       break;
     }
@@ -405,11 +406,12 @@ void MergeLoDTensor(LoDTensor *target,
     auto *t = lod_tensors[i];
     if (t->numel() && t->IsInitialized()) {
       PADDLE_ENFORCE_EQ(
-          new_type, t->type(),
+          new_type, framework::TransToProtoVarType(t->dtype()),
           platform::errors::InvalidArgument(
               "LoDTensor data type does not match, expected type is %s, actual "
               "type is %s.",
-              DataTypeToString(new_type), DataTypeToString(t->type())));
+              DataTypeToString(new_type),
+              DataTypeToString(framework::TransToProtoVarType(t->dtype()))));
       PADDLE_ENFORCE_EQ(
           new_layout, t->layout(),
           platform::errors::InvalidArgument(
@@ -444,7 +446,8 @@ void MergeLoDTensor(LoDTensor *target,
   target->Resize(new_dim);
   target->set_layout(new_layout);
   target->set_lod(new_lod);
-  target->mutable_data(dst_place, new_type);
+  target->mutable_data(dst_place,
+                       paddle::framework::TransToPtenDataType(new_type));
 
   int begin = 0;
   for (auto *src : lod_tensors) {
diff --git a/paddle/fluid/framework/multi_trainer.cc b/paddle/fluid/framework/multi_trainer.cc
index 32d3cdef4512b..0eb2ef4837bbe 100644
--- a/paddle/fluid/framework/multi_trainer.cc
+++ b/paddle/fluid/framework/multi_trainer.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include <string>
+
 #include "paddle/fluid/framework/device_worker_factory.h"
 #include "paddle/fluid/framework/trainer.h"
 #include "paddle/fluid/platform/lodtensor_printer.h"
@@ -250,12 +251,13 @@ void MultiTrainer::Finalize() {
       LoDTensor* thread_tensor = thread_var->GetMutable<LoDTensor>();
 #define MergeCallback(cpp_type, proto_type)                                    \
   do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
         VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                 << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
         exit(-1);                                                              \
       }                                                                        \
       MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \
diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc
index 3fe9e877658da..328bfdbf310ea 100644
--- a/paddle/fluid/framework/new_executor/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/data_transfer.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/new_executor/data_transfer.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace framework {
@@ -366,7 +367,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
         continue;
       }
       // only focus on complex dtype now
-      auto src_type = grad_tensor->type();
+      auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
       if (!framework::IsComplexType(src_type)) {
         VLOG(3) << "skip grad_tensor with not complexType";
         continue;
@@ -390,7 +391,7 @@ void HandleComplexGradToRealGrad(const OpFuncNode& op_func_node,
           platform::errors::Unavailable(
               "Forward tensor is nullptr when handle complex data to real."));
       // only need record type, the allocation may have been released
-      auto dst_type = tensor->saved_type();
+      auto dst_type = framework::TransToProtoVarType(tensor->dtype());
       // only focus on real dtype and need casting
       if (framework::IsComplexType(dst_type)) {
         continue;
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 6af5a70d66085..4670f043102d9 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <string>
 
 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_transform.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/details/nan_inf_utils.h"
@@ -109,13 +110,13 @@ static std::string GetDtype(const ScopeBase& scope, const std::string& name) {
     if (UNLIKELY(!tensor.IsInitialized())) {
       return "";
     }
-    return DataTypeToString(tensor.type());
+    return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
   } else if (var->IsType<pten::SelectedRows>()) {
     auto tensor = var->Get<pten::SelectedRows>().value();
     if (UNLIKELY(!tensor.IsInitialized())) {
       return "uninited";
     } else {
-      return DataTypeToString(tensor.type());
+      return DataTypeToString(framework::TransToProtoVarType(tensor.dtype()));
     }
   } else if (var->IsType<Strings>()) {
     return "strings";
@@ -1070,8 +1071,8 @@ static void CheckTensorNANOrInf(const std::string& op_type,
   if (tensor.memory_size() == 0) {
     return;
   }
-  if (tensor.type() != proto::VarType::FP32 &&
-      tensor.type() != proto::VarType::FP64) {
+  if (framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP32 &&
+      framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) {
     return;
   }
   PADDLE_ENFORCE_NE(
@@ -1536,7 +1537,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
         continue;
       }
       // only focus on complex dtype now
-      auto src_type = grad_tensor->type();
+      auto src_type = framework::TransToProtoVarType(grad_tensor->dtype());
       if (!IsComplexType(src_type)) {
         continue;
       }
@@ -1556,7 +1557,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad(
           platform::errors::Unavailable(
               "Forward tensor is nullptr when handle complex data to real."));
       // only need record type, the allocation may have been released
-      auto dst_type = tensor->saved_type();
+      auto dst_type = framework::TransToProtoVarType(tensor->dtype());
       // only focus on real dtype and need casting
       if (IsComplexType(dst_type)) {
         continue;
@@ -1770,7 +1771,8 @@ void OperatorWithKernel::ParseInputDataType(
             platform::errors::InvalidArgument("The %s Op's Input Variable `%s` "
                                               "contains uninitialized Tensor.",
                                               Type(), name));
-        proto::VarType::Type tmp = t->type();
+        proto::VarType::Type tmp =
+            paddle::framework::TransToProtoVarType(t->dtype());
         PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type,
                        platform::errors::InvalidArgument(
                            "The DataType of %s Op's duplicable or different "
@@ -1869,8 +1871,8 @@ proto::VarType::Type OperatorWithKernel::IndicateOrPromoteVarDataTypes(
   auto* tensor_b = GetTensorFormInputSafely(ctx, name2);
 
   // 2. Get two input types
-  auto type_a = tensor_a->type();
-  auto type_b = tensor_b->type();
+  auto type_a = framework::TransToProtoVarType(tensor_a->dtype());
+  auto type_b = framework::TransToProtoVarType(tensor_b->dtype());
 
   // 3. Get first input type or promote complex types
   auto target_type = PromoteTypesIfComplexExists(type_a, type_b);
@@ -2168,7 +2170,7 @@ void OperatorWithKernel::BuildPtenKernelContext(
         pt_kernel_context->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
       } else if (attr_defs[i].type_index ==
                  std::type_index(typeid(pten::DataType))) {
-        auto data_type = pten::TransToPtenDataType(
+        auto data_type = paddle::framework::TransToPtenDataType(
             static_cast<framework::proto::VarType::Type>(
                 BOOST_GET_CONST(int, attr)));
         pt_kernel_context->EmplaceBackAttr(data_type);
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
index db529bd17f4ab..1babd82f3c26a 100644
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -40,6 +40,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/variant.h"
 #include "paddle/utils/flat_hash_map.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/pten/core/compat/arg_map_context.h"
 #include "paddle/pten/core/compat/op_utils.h"
 #include "paddle/pten/core/kernel_context.h"
@@ -422,8 +423,8 @@ class ExecutionContext {
             "size(%d).",
             allocation_ptr->size(), framework::product(dim) * sizeof(T)));
 
-    paddle::framework::Tensor temp_tensor(
-        framework::ToDataType(std::type_index(typeid(T))));
+    paddle::framework::Tensor temp_tensor(framework::TransToPtenDataType(
+        framework::ToDataType(std::type_index(typeid(T)))));
     temp_tensor.Resize(dim);
     temp_tensor.ResetHolder(std::move(shared_allocation));
     return temp_tensor;
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
index 43d62e8d8df3b..31bf8d9b726d8 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc
@@ -27,6 +27,7 @@ limitations under the License. */
 
 #include "cinn/frontend/op_mappers/use_op_mappers.h"
 #include "cinn/frontend/var_type_utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/errors.h"
 
@@ -57,7 +58,7 @@ OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor(
   // op
   auto tensor_type = ::paddle::framework::proto::VarType::FP32;
   if (!skip_trans_type) {
-    tensor_type = tensor.type();
+    tensor_type = framework::TransToProtoVarType(tensor.dtype());
   }
   auto cinn_var_type = TransformVarDataTypeToCinn(tensor_type);
   info.type = ::cinn::frontend::utils::CppVarType2CommonType(cinn_var_type);
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc
index 09df9a7ad2ce4..09bca4a735461 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "gtest/gtest.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h"
 
 namespace paddle {
@@ -206,7 +207,9 @@ class CinnGraphSymbolizationTest : public ::testing::Test {
       LoDTensor tensor;
       DDim dims = {256, 1024};
       tensor.Resize(dims);
-      tensor.mutable_data(platform::CPUPlace(), proto::VarType::FP32);
+      tensor.mutable_data(
+          platform::CPUPlace(),
+          framework::TransToPtenDataType(framework::proto::VarType::FP32));
       return tensor;
     };
 #define FillFeedList(Name) feed_targets[#Name] = create_tensor();
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index 7eebfb904cf66..aed5e2c7405ac 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
 #include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h"
 #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
@@ -775,7 +776,8 @@ void ParallelExecutor::BCastParamsToDevices(
       std::vector<void *> buffers;
       buffers.reserve(member_->places_.size());
       size_t numel = main_tensor.numel();
-      ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
+      ncclDataType_t data_type = platform::ToNCCLDataType(
+          framework::TransToProtoVarType(main_tensor.dtype()));
       for (size_t i = 0; i < member_->places_.size(); ++i) {
         auto place = member_->places_[i];
         void *buffer;
@@ -786,7 +788,7 @@ void ParallelExecutor::BCastParamsToDevices(
           auto local_scope = member_->local_scopes_[i];
           auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
           t->Resize(dims);
-          buffer = t->mutable_data(place, main_tensor.type());
+          buffer = t->mutable_data(place, main_tensor.dtype());
         }
         buffers.push_back(buffer);
       }
@@ -818,7 +820,8 @@ void ParallelExecutor::BCastParamsToDevices(
       // but broadcast is equivalent to no type of operation, does not affect
       // correctness.
       BKCLDataType data_type = BKCL_FLOAT;
-      // BKCLDataType data_type = platform::ToBKCLDataType(main_tensor.type());
+      // BKCLDataType data_type =
+      // platform::ToBKCLDataType(framework::TransToProtoVarType(main_tensor.dtype()));
       for (size_t i = 0; i < member_->places_.size(); ++i) {
         auto place = member_->places_[i];
         void *buffer;
@@ -829,7 +832,7 @@ void ParallelExecutor::BCastParamsToDevices(
           auto local_scope = member_->local_scopes_[i];
           auto *t = local_scope->Var(var)->GetMutable<LoDTensor>();
           t->Resize(dims);
-          buffer = t->mutable_data(place, main_tensor.type());
+          buffer = t->mutable_data(place, main_tensor.dtype());
         }
         buffers.push_back(buffer);
       }
@@ -848,7 +851,8 @@ void ParallelExecutor::BCastParamsToDevices(
         for (size_t i = 0; i < member_->places_.size(); ++i) {
           auto &bkcl_ctx = bkcl_ctxs->at(member_->places_[i]);
           auto broadcast_numel = numel;
-          if (main_tensor.type() == framework::proto::VarType::INT64) {
+          if (framework::TransToProtoVarType(main_tensor.dtype()) ==
+              framework::proto::VarType::INT64) {
             broadcast_numel *= 2;
           }
           PADDLE_ENFORCE_EQ(
@@ -873,7 +877,7 @@ void ParallelExecutor::BCastParamsToDevices(
 
         auto copy_memory = [&] {
           t->Resize(dims);
-          t->mutable_data(cpu, main_tensor.type());
+          t->mutable_data(cpu, main_tensor.dtype());
           paddle::framework::TensorCopy(main_tensor, cpu, t);
         };
 
diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc
index 8f0efdf42f1ee..4d34ba85517e1 100644
--- a/paddle/fluid/framework/ps_gpu_trainer.cc
+++ b/paddle/fluid/framework/ps_gpu_trainer.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <cstdlib>
 #include <string>
 #include <vector>
+
 #include "io/fs.h"
 #include "paddle/fluid/framework/data_feed_factory.h"
 #include "paddle/fluid/framework/data_set.h"
@@ -232,12 +233,13 @@ void PSGPUTrainer::Finalize() {
       }
 #define MergeCallback(cpp_type, proto_type)                                    \
   do {                                                                         \
-    if (root_tensor->type() == proto_type) {                                   \
-      if (thread_tensor->type() != proto_type) {                               \
+    if (framework::TransToProtoVarType(root_tensor->dtype()) == proto_type) {  \
+      if (framework::TransToProtoVarType(thread_tensor->dtype()) !=            \
+          proto_type) {                                                        \
         VLOG(0) << "Error: thread id=" << j << ", need_merge_var_names_[" << i \
                 << "] " << need_merge_var_names_[i]                            \
-                << ", root tensor type=" << root_tensor->type()                \
-                << ", thread tensor type=" << thread_tensor->type();           \
+                << ", root tensor type=" << root_tensor->dtype()               \
+                << ", thread tensor type=" << thread_tensor->dtype();          \
         exit(-1);                                                              \
       }                                                                        \
       MergeToRootScope<cpp_type>(root_tensor, thread_tensor);                  \
diff --git a/paddle/fluid/framework/pten_utils.cc b/paddle/fluid/framework/pten_utils.cc
index ccf7752f2c2fe..ea2e62d89f63d 100644
--- a/paddle/fluid/framework/pten_utils.cc
+++ b/paddle/fluid/framework/pten_utils.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <sstream>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/pten_utils.h"
 
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -59,7 +60,7 @@ class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
 OpKernelType TransPtenKernelKeyToOpKernelType(
     const pten::KernelKey& kernel_key) {
   proto::VarType::Type data_type =
-      pten::TransToProtoVarType(kernel_key.dtype());
+      paddle::framework::TransToProtoVarType(kernel_key.dtype());
   // no need to set current device id here
   platform::Place place = pten::TransToPtenPlace(kernel_key.backend(), false);
   DataLayout data_layout = kernel_key.layout();
@@ -87,7 +88,7 @@ pten::KernelKey TransOpKernelTypeToPtenKernelKey(
   }
   paddle::experimental::DataLayout layout = kernel_type.data_layout_;
   paddle::experimental::DataType dtype =
-      pten::TransToPtenDataType(kernel_type.data_type_);
+      paddle::framework::TransToPtenDataType(kernel_type.data_type_);
   return pten::KernelKey(backend, layout, dtype);
 }
 
diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc
index 0f1a8e2a9ed5f..86574c5ca2be3 100644
--- a/paddle/fluid/framework/save_load_util.cc
+++ b/paddle/fluid/framework/save_load_util.cc
@@ -16,6 +16,7 @@
 
 #include <fstream>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/layer.h"
 
 namespace paddle {
@@ -282,7 +283,7 @@ bool SaveTensorToDisk(const std::string& file_name,
     auto tensor = itera.second;
 
     proto::VarType::TensorDesc desc;
-    desc.set_data_type(tensor->type());
+    desc.set_data_type(framework::TransToProtoVarType(tensor->dtype()));
     auto dims = framework::vectorize(tensor->dims());
     auto* pb_dims = desc.mutable_dims();
     pb_dims->Resize(static_cast<int>(dims.size()), 0);
@@ -294,7 +295,7 @@ bool SaveTensorToDisk(const std::string& file_name,
 
     // save tensor
     uint64_t data_size =
-        tensor->numel() * framework::SizeOfType(tensor->type());
+        tensor->numel() * framework::DataTypeSize(tensor->dtype());
     auto* data_ptr = tensor->data();
     if (platform::is_gpu_place(tensor->place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
index b926a3cc7659b..844b5d8269500 100644
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/complex.h"
@@ -55,10 +56,10 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
   // than numel()*size(type())
   auto dst_ptr =
       src.layout() == DataLayout::kMKLDNN
-          ? dst->mutable_data(dst_place, src.type(), src.memory_size())
-          : dst->mutable_data(dst_place, src.type());
+          ? dst->mutable_data(dst_place, src.dtype(), src.memory_size())
+          : dst->mutable_data(dst_place, src.dtype());
 #else
-  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
 #endif
   if (src_ptr == dst_ptr && src_place == dst_place) {
     VLOG(3) << "Skip copy the same data async from " << src_place << " to "
@@ -70,9 +71,9 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
 #ifdef PADDLE_WITH_MKLDNN
   auto size = src.layout() == DataLayout::kMKLDNN
                   ? src.memory_size()
-                  : src.numel() * SizeOfType(src.type());
+                  : src.numel() * framework::DataTypeSize(src.dtype());
 #else
-  auto size = src.numel() * SizeOfType(src.type());
+  auto size = src.numel() * framework::DataTypeSize(src.dtype());
 #endif
 
   if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
@@ -126,7 +127,7 @@ void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
     Tensor npu_pinned_tensor;
     npu_pinned_tensor.Resize(src.dims());
     auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, src.type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype());
     memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
 
     //  2. async copy npu pinned tensor -> npu tensor
@@ -410,7 +411,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
 #endif
   auto src_place = src.place();
   auto src_ptr = src.data();
-  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  auto dst_ptr = dst->mutable_data(dst_place, src.dtype());
   VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
 
   if (src_ptr == dst_ptr && src_place == dst_place) {
@@ -419,7 +420,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
     return;
   }
 
-  auto size = src.numel() * SizeOfType(src.type());
+  auto size = src.numel() * framework::DataTypeSize(src.dtype());
   if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
     memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
   }
@@ -582,8 +583,9 @@ struct AnyDTypeVisitor {
 template <typename Predicate, typename DevCtx>
 inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
                     const DevCtx& ctx, framework::Tensor* out) {
-  VisitDataType(tensor.type(), AnyDTypeVisitor<Predicate, DevCtx>(
-                                   predicate, tensor, ctx, out));
+  VisitDataType(
+      framework::TransToProtoVarType(tensor.dtype()),
+      AnyDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
 }
 
 template <typename Predicate>
@@ -722,8 +724,9 @@ struct AllDTypeVisitor {
 template <typename Predicate, typename DevCtx>
 inline void AllImpl(Predicate predicate, const framework::Tensor& tensor,
                     const DevCtx& ctx, framework::Tensor* out) {
-  VisitDataType(tensor.type(), AllDTypeVisitor<Predicate, DevCtx>(
-                                   predicate, tensor, ctx, out));
+  VisitDataType(
+      framework::TransToProtoVarType(tensor.dtype()),
+      AllDTypeVisitor<Predicate, DevCtx>(predicate, tensor, ctx, out));
 }
 
 template <typename Predicate>
@@ -930,7 +933,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
      // int32_t  size
      // void*    protobuf message
     proto::VarType::TensorDesc desc;
-    desc.set_data_type(tensor.type());
+    desc.set_data_type(framework::TransToProtoVarType(tensor.dtype()));
     auto dims = framework::vectorize(tensor.dims());
     auto* pb_dims = desc.mutable_dims();
     pb_dims->Resize(static_cast<int>(dims.size()), 0);
@@ -941,7 +944,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
     os.write(out.data(), size);
   }
   {  // the 3rd field, tensor data
-    uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
+    uint64_t size = tensor.numel() * framework::DataTypeSize(tensor.dtype());
 
     auto* data_ptr = tensor.data();
     PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
@@ -1419,13 +1422,14 @@ std::ostream& operator<<(std::ostream& os, const pten::DenseTensor& t) {
     dev_ctx.Wait();
   }
 
-#define PrintTensorCallback(cpp_type, proto_type)            \
-  do {                                                       \
-    if (tensor.type() == proto_type) {                       \
-      os << "  - dtype: " << proto_type << "\n";             \
-      paddle::framework::print_tensor<cpp_type>(os, tensor); \
-      return os;                                             \
-    }                                                        \
+#define PrintTensorCallback(cpp_type, proto_type)                 \
+  do {                                                            \
+    if (paddle::framework::TransToProtoVarType(tensor.dtype()) == \
+        proto_type) {                                             \
+      os << "  - dtype: " << proto_type << "\n";                  \
+      paddle::framework::print_tensor<cpp_type>(os, tensor);      \
+      return os;                                                  \
+    }                                                             \
   } while (0)
 
   _ForEachDataType_(PrintTensorCallback);
diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
index f0c41e6dc0fcf..bcaf3c719cb72 100644
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -160,7 +160,7 @@ void TensorFromArray(const T* src, const size_t& array_size,
     Tensor npu_pinned_tensor;
     npu_pinned_tensor.Resize(dst->dims());
     auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
     memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
 
     //  2. async copy npu pinned tensor -> npu tensor
@@ -211,7 +211,7 @@ void TensorFromVector(const std::vector<T>& src,
   // so pass nullptr as stream to  memory::Copy().
   else if (platform::is_npu_place(dst_place)) {  // NOLINT
     //  1. vector -> npu pinned tensor
-    Tensor npu_pinned_tensor(dst->type());
+    Tensor npu_pinned_tensor(dst->dtype());
     platform::NPUPinnedPlace npu_pinned_place;
     auto npu_pinned_ptr =
         npu_pinned_tensor.mutable_data<T>(dst->dims(), npu_pinned_place);
@@ -280,7 +280,7 @@ inline void TensorFromVector(const std::vector<bool>& src,
     Tensor npu_pinned_tensor;
     npu_pinned_tensor.Resize(dst->dims());
     auto npu_pinned_ptr =
-        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->type());
+        npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype());
     memory::Copy(npu_pinned_place, npu_pinned_ptr, src_place, src_ptr, size);
 
     //  2. async copy npu pinned tensor -> npu tensor
diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc
index 0f105ec9a3082..99adf9b92244a 100644
--- a/paddle/fluid/imperative/all_reduce.cc
+++ b/paddle/fluid/imperative/all_reduce.cc
@@ -15,6 +15,7 @@
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
 
 #include "paddle/fluid/imperative/all_reduce.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 #ifdef PADDLE_WITH_NCCL
 #include <nccl.h>
@@ -62,8 +63,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
 
   const void *src_ptr = src.data();
   dst->Resize(src.dims());
-  auto *dst_ptr = dst->mutable_data(src.place(), src.type());
-  auto nccl_dtype = platform::ToNCCLDataType(src.type());
+  auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  auto nccl_dtype =
+      platform::ToNCCLDataType(framework::TransToProtoVarType(src.dtype()));
   PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
       src_ptr, dst_ptr, src.numel(), nccl_dtype, ncclSum, comm->comm(),
       stream));
@@ -82,7 +84,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
       platform::errors::Unimplemented(
           "Imperative mode does not support multi-CPU training yet."));
 
-  auto dtype = src_tensor.type();
+  auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
   auto nccl_dtype = platform::ToNCCLDataType(dtype);
   auto *dev_ctx = static_cast<platform::CUDADeviceContext *>(
       platform::DeviceContextPool::Instance().Get(place));
@@ -127,7 +129,7 @@ static void AllReduce(const pten::SelectedRows &src, pten::SelectedRows *dst,
   dims[0] = rows_num;
   auto feature_size = framework::product(dims) / dims[0];
   dst_tensor->Resize(dims);
-  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
+  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
   const auto *src_tensor_ptr = src_tensor.data();
 
   auto sizeof_dtype = framework::SizeOfType(dtype);
diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
index 4c91ece049301..973541e6dcc1b 100644
--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -152,7 +153,8 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) {
         // correct. var->DataType() returns the default dtype, which is float32.
         // Here, we use the type of the corresponding forward datatype.
 
-        tensor->mutable_data(op.place(), var->ForwardDataType());
+        tensor->mutable_data(
+            op.place(), framework::TransToPtenDataType(var->ForwardDataType()));
         VLOG(6) << "Set ungenerated Grad: " << var->Name()
                 << " as zero with dtype "
                 << framework::DataTypeToString(var->ForwardDataType());
diff --git a/paddle/fluid/imperative/bkcl_context.cc b/paddle/fluid/imperative/bkcl_context.cc
index f08dd59e39206..11abbfe7cf6a3 100644
--- a/paddle/fluid/imperative/bkcl_context.cc
+++ b/paddle/fluid/imperative/bkcl_context.cc
@@ -13,13 +13,14 @@
 // limitations under the License.
 
 #if defined(PADDLE_WITH_XPU_BKCL)
-#include "paddle/fluid/imperative/bkcl_context.h"
 
 #include <string>
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/imperative/bkcl_context.h"
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/xpu/bkcl_helper.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -41,8 +42,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
 
   const void *src_ptr = src.data();
   dst->Resize(src.dims());
-  auto *dst_ptr = dst->mutable_data(src.place(), src.type());
-  auto bkcl_dtype = platform::ToBKCLDataType(src.type());
+  auto *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  auto bkcl_dtype =
+      platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype()));
 
   PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(),
                                     bkcl_dtype, BKCL_ADD, stream),
@@ -159,7 +161,8 @@ void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
   XPUStream stream = comm->stream();
 
   void *src_ptr = src_tensor->data();
-  auto data_type = platform::ToBKCLDataType(src_tensor->type());
+  auto data_type = platform::ToBKCLDataType(
+      framework::TransToProtoVarType(src_tensor->dtype()));
 
   PADDLE_ENFORCE_EQ(bkcl_broadcast(comm->comm(), src_ptr, src_ptr,
                                    src_tensor->numel(), data_type, 0, stream),
diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc
index eeac500cac413..0c55cd9d5d08d 100644
--- a/paddle/fluid/imperative/gloo_context.cc
+++ b/paddle/fluid/imperative/gloo_context.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/imperative/gloo_context.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/fleet/gloo_wrapper.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -109,7 +110,7 @@ void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor,
                                     framework::Tensor *dst_tensor) {
   auto gloo_wrapper = framework::GlooWrapper::GetInstance();
   dst_tensor->Resize(src_tensor.dims());
-  switch (src_tensor.type()) {
+  switch (framework::TransToProtoVarType(src_tensor.dtype())) {
     GLOO_CASE(framework::proto::VarType::FP32, float, gloo_wrapper);
     GLOO_CASE(framework::proto::VarType::FP64, double, gloo_wrapper);
     GLOO_CASE(framework::proto::VarType::INT32, int, gloo_wrapper);
@@ -139,7 +140,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
   VLOG(3) << "SelectedRows AllReduce start";
   const auto &src_tensor = src.value();
   const auto &place = src_tensor.place();
-  auto dtype = src_tensor.type();
+  auto dtype = framework::TransToProtoVarType(src_tensor.dtype());
   // 1. Gather rows number from all workers. Here use ncclAllGather to do this,
   // but we can use other ways to implement is in the future
   const auto &src_rows = src.rows();
@@ -169,7 +170,7 @@ void GLOOParallelContext::AllReduce(const pten::SelectedRows &src,
   std::for_each(element_nums.begin(), element_nums.end(),
                 [feature_size](size_t &x) { x = x * feature_size; });
 
-  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, dtype);
+  auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype());
   gloo_wrapper->AllGatherVector<int64_t>(const_cast<int64_t *>(src_rows_ptr),
                                          static_cast<int64_t *>(dst_rows_ptr),
                                          rows_num_vector);
diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc
index 5eed7eca7a751..d57cb696526b4 100644
--- a/paddle/fluid/imperative/gradient_accumulator.cc
+++ b/paddle/fluid/imperative/gradient_accumulator.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include <utility>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows_utils.h"
 #include "paddle/fluid/imperative/layer.h"
@@ -263,10 +264,11 @@ void TensorAdd(const VarType& src, VarType* dst) {
           "%zu and the number of elements of destination tensor is %zu.",
           numel, dst_tensor->numel()));
 
-  auto data_type = src_tensor.type();
+  auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
   auto place = src_tensor.place();
 
-  PADDLE_ENFORCE_EQ(dst_tensor->type(), data_type,
+  PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(dst_tensor->dtype()),
+                    data_type,
                     platform::errors::PreconditionNotMet(
                         "The data type of source tensor and destination tensor "
                         "should be equal, Otherwise, the calculation results "
@@ -376,7 +378,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
   const pten::SelectedRows& src_selected_rows =
       GetInnerTensor<pten::SelectedRows>(src);
   auto place = dst_tensor->place();
-  auto data_type = src_selected_rows.value().type();
+  auto data_type =
+      framework::TransToProtoVarType(src_selected_rows.value().dtype());
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
 
 #define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type)           \
@@ -422,13 +425,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
   const pten::DenseTensor& src_tensor =
       GetInnerTensor<pten::DenseTensor>(src_tensor_var);
   const auto& place = src_tensor.place();
-  auto data_type = src_tensor.type();
+  auto data_type = framework::TransToProtoVarType(src_tensor.dtype());
   auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
 
   pten::DenseTensor* dst_tensor =
       GetInnerMutableTensor<pten::DenseTensor>(dst_tensor_var);
   dst_tensor->Resize(src_tensor.dims());
-  dst_tensor->mutable_data(place, data_type);
+  dst_tensor->mutable_data(place, src_tensor.dtype());
+
 #define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type)            \
   if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) {       \
     paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
@@ -477,7 +481,8 @@ std::shared_ptr<VariableWrapper> SelectedRowsMerge(
   auto& src_selected_rows1 = src1.Get<pten::SelectedRows>();
   auto& src_selected_rows2 = src2.Get<pten::SelectedRows>();
   auto place = src_selected_rows1.value().place();
-  auto data_type = src_selected_rows1.value().type();
+  auto data_type =
+      framework::TransToProtoVarType(src_selected_rows1.value().dtype());
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
 
   std::vector<const pten::SelectedRows*> src_selected_rows;
@@ -702,12 +707,14 @@ void EagerGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
         VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
                 << var->Var().Get<framework::LoDTensor>().dims();
         tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
         pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
       } else {
         auto* tensor =
             dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
         pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
       }
     }
@@ -834,12 +841,14 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr<VariableWrapper> var,
         VLOG(6) << "Dims of " << dst_var->Name() << " is set as: "
                 << var->Var().Get<framework::LoDTensor>().dims();
         tensor->Resize(var->Var().Get<framework::LoDTensor>().dims());
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
         pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
       } else {
         auto* tensor =
             dst_var->MutableVar()->GetMutable<framework::LoDTensor>();
-        tensor->mutable_data(place, var->DataType());
+        tensor->mutable_data(place,
+                             framework::TransToPtenDataType(var->DataType()));
         pten::funcs::set_constant(*dev_ctx, tensor, 0.0);
       }
     }
diff --git a/paddle/fluid/imperative/hccl_context.cc b/paddle/fluid/imperative/hccl_context.cc
index 7292c0f82fced..31d988753f23c 100644
--- a/paddle/fluid/imperative/hccl_context.cc
+++ b/paddle/fluid/imperative/hccl_context.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/imperative/hccl_context.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
@@ -44,8 +45,9 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst,
 
   void *src_ptr = const_cast<void *>(src.data());
   dst->Resize(src.dims());
-  void *dst_ptr = dst->mutable_data(src.place(), src.type());
-  HcclDataType hccl_dtype = platform::ToHCCLDataType(src.type());
+  void *dst_ptr = dst->mutable_data(src.place(), src.dtype());
+  HcclDataType hccl_dtype =
+      platform::ToHCCLDataType(framework::TransToProtoVarType(src.dtype()));
 
   PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclAllReduce(
       src_ptr, dst_ptr, src.numel(), hccl_dtype, HCCL_REDUCE_SUM, comm->comm(),
@@ -169,7 +171,8 @@ void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
 
     void *src_ptr =
         reinterpret_cast<void *>(const_cast<void *>(src_tensor->data()));
-    auto hccl_dtype = platform::ToHCCLDataType(src_tensor->type());
+    auto hccl_dtype = platform::ToHCCLDataType(
+        framework::TransToProtoVarType(src_tensor->dtype()));
     PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
         src_ptr, src_tensor->numel(), hccl_dtype, 0, comm->comm(),
         reinterpret_cast<void *>(stream)));
diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc
index 65309b66db594..c3f04cba36d89 100644
--- a/paddle/fluid/imperative/jit/program_desc_tracer.cc
+++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/imperative/jit/program_desc_tracer.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace imperative {
@@ -253,7 +254,7 @@ void ProgramDescTracer::InsertVarIfNotExist(
     new_var_desc->SetShape(framework::vectorize<int64_t>(tensor.dims()));
     new_var_desc->SetLoDLevel(tensor.lod().size());
     if (tensor.IsInitialized()) {
-      new_var_desc->SetDataType(tensor.type());
+      new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
     } else {
       new_var_desc->SetDataType(framework::proto::VarType::FP32);
     }
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
index 60e1291a08700..b8c423f77bd23 100644
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -15,6 +15,8 @@
 #include "paddle/fluid/imperative/layer.h"
 
 #include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
+
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/imperative/infer_var_type_context.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -99,7 +101,9 @@ static std::string DebugString(
       auto& tensor = var.Get<framework::LoDTensor>();
       ss << "LoDTensor<";
       if (tensor.IsInitialized()) {
-        ss << framework::DataTypeToString(tensor.type()) << ", ";
+        ss << framework::DataTypeToString(
+                  framework::TransToProtoVarType(tensor.dtype()))
+           << ", ";
         ss << tensor.place() << ", ";
         ss << "(" << tensor.dims() << ")";
       } else {
@@ -112,7 +116,9 @@ static std::string DebugString(
       auto& tensor = selected_rows.value();
       auto& rows = selected_rows.rows();
       if (tensor.IsInitialized()) {
-        ss << framework::DataTypeToString(tensor.type()) << ", ";
+        ss << framework::DataTypeToString(
+                  framework::TransToProtoVarType(tensor.dtype()))
+           << ", ";
         ss << tensor.place() << ", ";
         ss << "height(" << selected_rows.height() << "), rows(";
         std::for_each(rows.cbegin(), rows.cend(),
diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc
index 066d0db134817..e9d987cc7045f 100644
--- a/paddle/fluid/imperative/nccl_context.cc
+++ b/paddle/fluid/imperative/nccl_context.cc
@@ -25,6 +25,7 @@
 #include "paddle/fluid/platform/dynload/nccl.h"
 #endif
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
@@ -143,7 +144,8 @@ void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) {
   gpuStream_t stream = comm->stream();
 
   void *src_ptr = src_tensor->data();
-  auto nccl_dtype = platform::ToNCCLDataType(src_tensor->type());
+  auto nccl_dtype = platform::ToNCCLDataType(
+      framework::TransToProtoVarType(src_tensor->dtype()));
   PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
       src_ptr, src_tensor->numel(), nccl_dtype, 0, comm->comm(), stream));
 }
diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc
index ed60a4dc0849b..fcadd0046fe2c 100644
--- a/paddle/fluid/imperative/partial_grad_engine.cc
+++ b/paddle/fluid/imperative/partial_grad_engine.cc
@@ -24,6 +24,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/imperative/layer.h"
 #include "paddle/fluid/imperative/op_base.h"
@@ -312,9 +313,11 @@ static void FillConstantLike(const VariableWrapper &ref_var,
   // we can't get data_type_ directly. We need to check if we can only use
   // default data_type for now.
   if (ref_var.ForwardDataType() != -1) {
-    dst_tensor->mutable_data(place, ref_var.ForwardDataType());
+    dst_tensor->mutable_data(
+        place, framework::TransToPtenDataType(ref_var.ForwardDataType()));
   } else {
-    dst_tensor->mutable_data(place, ref_var.DataType());
+    dst_tensor->mutable_data(
+        place, framework::TransToPtenDataType(ref_var.DataType()));
   }
   pten::funcs::set_constant(*dev_ctx, dst_tensor, value);
 }
@@ -739,7 +742,8 @@ PartialGradTask::PartialGradTask(
           platform::errors::InvalidArgument(
               "The %d-th grad_output's shape does not match the %d-th output",
               i, i));
-      PADDLE_ENFORCE_EQ(grad_tensor.type(), out_tensor.type(),
+      PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(grad_tensor.dtype()),
+                        framework::TransToProtoVarType(out_tensor.dtype()),
                         platform::errors::InvalidArgument(
                             "The %d-th grad_output's data type does not "
                             "match the %d-th output",
diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h
index b3b3725ba993e..d5dc53196dd7f 100644
--- a/paddle/fluid/imperative/prepared_operator.h
+++ b/paddle/fluid/imperative/prepared_operator.h
@@ -29,6 +29,7 @@
 #include "paddle/fluid/imperative/type_defs.h"
 #include "paddle/fluid/imperative/var_helper.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/selected_rows.h"
 
@@ -425,7 +426,7 @@ void BuildDygraphPtenKernelContext(
         kernel_ctx->EmplaceBackAttr(BOOST_GET_CONST(std::string, attr));
       } else if (attr_defs[i].type_index ==
                  std::type_index(typeid(pten::DataType))) {
-        auto data_type = pten::TransToPtenDataType(
+        auto data_type = framework::TransToPtenDataType(
             static_cast<framework::proto::VarType::Type>(
                 BOOST_GET_CONST(int, attr)));
         kernel_ctx->EmplaceBackAttr(data_type);
diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc
index 361b9eb0fe64f..c8b5b452453fc 100644
--- a/paddle/fluid/imperative/reducer.cc
+++ b/paddle/fluid/imperative/reducer.cc
@@ -446,7 +446,7 @@ void Reducer::InitializeGroups(
       InitializeDenseGroups(variable_indices_, &group);
       auto tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
       tensor->Resize(framework::make_ddim({group.all_length_}))
-          .mutable_data(place_, group.dtype_);
+          .mutable_data(place_, framework::TransToPtenDataType(group.dtype_));
     }
 
     // map variables to this group by VariableLocator
@@ -737,7 +737,8 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
       // by avoiding tensor construction
       if (!group_tensor.IsInitialized()) {
         group_tensor.Resize({static_cast<int64_t>(length)});
-        group_tensor.mutable_data(place_, group.dtype_);
+        group_tensor.mutable_data(place_,
+                                  framework::TransToPtenDataType(group.dtype_));
       }
 
 #ifdef PADDLE_WITH_XPU_BKCL
diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
index e91b0b0a7770e..584f8ead3d8de 100644
--- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
+++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "gtest/gtest.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/memory/memcpy.h"
@@ -224,8 +225,10 @@ static bool IsEqualVar(const framework::Variable& var1,
 
   auto* t1_p = t1.data();
   auto* t2_p = t2.data();
-  return std::memcmp(t1_p, t2_p,
-                     t1.numel() * framework::SizeOfType(t1.type())) == 0;
+  return std::memcmp(
+             t1_p, t2_p,
+             t1.numel() * framework::SizeOfType(
+                              framework::TransToProtoVarType(t1.dtype()))) == 0;
 }
 
 template <typename T>
diff --git a/paddle/fluid/imperative/tests/test_group.cc b/paddle/fluid/imperative/tests/test_group.cc
index 0c058038968be..b4a5ff90fe609 100644
--- a/paddle/fluid/imperative/tests/test_group.cc
+++ b/paddle/fluid/imperative/tests/test_group.cc
@@ -86,7 +86,7 @@ void GroupConcatSplit(Place place, size_t size) {
     tmp.ShareDataWith(*tensor).Resize({static_cast<int64_t>(len)});
     group.dense_tensors_.push_back(std::move(tmp));
     group.all_length_ += len;
-    group.dtype_ = tensor->type();
+    group.dtype_ = framework::TransToProtoVarType(tensor->dtype());
   }
 
   paddle::platform::DeviceContextPool& pool =
@@ -96,7 +96,7 @@ void GroupConcatSplit(Place place, size_t size) {
   {  // concat
     auto* tensor = group.dense_contents_.GetMutable<framework::LoDTensor>();
     tensor->Resize(framework::make_ddim({group.all_length_}))
-        .mutable_data(place, group.dtype_);
+        .mutable_data(place, framework::TransToPtenDataType(group.dtype_));
     group.ConcatTensors(*dev_ctx);
 
     group.DivNRanks(*dev_ctx, 1);
diff --git a/paddle/fluid/imperative/var_helper.cc b/paddle/fluid/imperative/var_helper.cc
index 4686aef5afdf7..3548f2eeafd24 100644
--- a/paddle/fluid/imperative/var_helper.cc
+++ b/paddle/fluid/imperative/var_helper.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/imperative/var_helper.h"
 
 #include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/feed_fetch_type.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -170,9 +171,11 @@ template <>
 framework::proto::VarType::Type GetDataType<egr::EagerTensor>(
     std::shared_ptr<egr::EagerTensor> var) {
   if (var->Var().IsType<pten::SelectedRows>()) {
-    return var->Var().Get<pten::SelectedRows>().value().type();
+    return framework::TransToProtoVarType(
+        var->Var().Get<pten::SelectedRows>().value().type());
   } else if (var->Var().IsType<framework::LoDTensor>()) {
-    return var->Var().Get<framework::LoDTensor>().type();
+    return framework::TransToProtoVarType(
+        var->Var().Get<framework::LoDTensor>().type());
   } else {
     PADDLE_THROW(paddle::platform::errors::PermissionDenied(
         "We only support pten::SelectedRows and framework::LoDTensor in "
diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h
index bd96cd3f1aa17..700b859b0b7fa 100644
--- a/paddle/fluid/imperative/variable_wrapper.h
+++ b/paddle/fluid/imperative/variable_wrapper.h
@@ -19,6 +19,7 @@
 #include <string>
 #include <utility>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_kernel_type.h"
 #include "paddle/fluid/framework/string_array.h"
 #include "paddle/fluid/framework/variable.h"
@@ -169,7 +170,7 @@ class VariableWrapper {
       }
     }
     if (tensor && tensor->IsInitialized()) {
-      return tensor->type();
+      return framework::TransToProtoVarType(tensor->dtype());
     } else {
       VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 694e84ad756b5..caac973d8b89a 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -551,7 +551,7 @@ bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
     framework::FetchType &fetch_var =
         framework::GetFetchVariable(*scope, "fetch", idx);
     auto &fetch = BOOST_GET(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
     auto output = &(outputs->at(i));
     output->name = fetches_[idx]->Input("X")[0];
     if (type == framework::proto::VarType::FP32) {
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index d1f49b84f0679..c6de967afb71d 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -327,7 +327,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
     framework::FetchType &fetch_var =
         framework::GetFetchVariable(*scope, "fetch", idx);
     auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
-    auto type = fetch.type();
+    auto type = framework::TransToProtoVarType(fetch.dtype());
     auto output = &(outputs->at(i));
     output->name = fetchs_[idx]->Input("X")[0];
     if (type == framework::DataTypeTrait<float>::DataType()) {
diff --git a/paddle/fluid/inference/api/api_impl_tester.cc b/paddle/fluid/inference/api/api_impl_tester.cc
index 124279d246093..41a022cd164f9 100644
--- a/paddle/fluid/inference/api/api_impl_tester.cc
+++ b/paddle/fluid/inference/api/api_impl_tester.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <thread>  // NOLINT
 
 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/inference/api/api_impl.h"
 #include "paddle/fluid/inference/tests/test_helper.h"
 
@@ -36,13 +37,16 @@ namespace paddle {
 PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) {
   PaddleTensor pt;
 
-  if (t->type() == framework::proto::VarType::INT64) {
+  if (framework::TransToProtoVarType(t->dtype()) ==
+      framework::proto::VarType::INT64) {
     pt.data.Reset(t->data(), t->numel() * sizeof(int64_t));
     pt.dtype = PaddleDType::INT64;
-  } else if (t->type() == framework::proto::VarType::FP32) {
+  } else if (framework::TransToProtoVarType(t->dtype()) ==
+             framework::proto::VarType::FP32) {
     pt.data.Reset(t->data(), t->numel() * sizeof(float));
     pt.dtype = PaddleDType::FLOAT32;
-  } else if (t->type() == framework::proto::VarType::INT32) {
+  } else if (framework::TransToProtoVarType(t->dtype()) ==
+             framework::proto::VarType::INT32) {
     pt.data.Reset(t->data(), t->numel() * sizeof(int32_t));
     pt.dtype = PaddleDType::INT32;
   } else {
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
index 7fe980bf40641..5dc05711807e6 100644
--- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc
+++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
@@ -122,7 +123,7 @@ T *Tensor::data(PlaceType *place, int *size) const {
 
 DataType Tensor::type() const {
   EAGER_GET_TENSOR(paddle::framework::LoDTensor);
-  auto type = tensor->type();
+  auto type = paddle::framework::TransToProtoVarType(tensor->dtype());
   if (type == paddle::framework::proto::VarType::FP32) {
     return DataType::FLOAT32;
   } else if (type == paddle::framework::proto::VarType::FP16) {
diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc
index c4e195b6ec8fa..12d7f78e169cc 100644
--- a/paddle/fluid/inference/capi/pd_predictor.cc
+++ b/paddle/fluid/inference/capi/pd_predictor.cc
@@ -172,7 +172,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
     snprintf(output_i.name, output_names[i].length() + 1, "%s",
              output_names[i].c_str());
     auto output_t = predictor->GetOutputTensor(output_names[i]);
-    output_i.dtype = ConvertToPDDataType(output_t->type());
+    output_i.dtype =
+        ConvertToPDDataType(framework::TransToProtoVarType(output_t->dtype()));
     std::vector<int> output_shape = output_t->shape();
     output_i.shape = new int[output_shape.size()];
     memmove(output_i.shape, output_shape.data(),
@@ -256,7 +257,8 @@ void PD_SetZeroCopyInput(PD_Predictor* predictor,
 
 void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
   auto output = predictor->predictor->GetOutputTensor(tensor->name);
-  tensor->dtype = ConvertToPDDataType(output->type());
+  tensor->dtype =
+      ConvertToPDDataType(framework::TransToProtoVarType(output->dtype()));
   auto shape = output->shape();
   size_t shape_size = shape.size();
   if (tensor->shape.capacity < shape_size * sizeof(int)) {
@@ -271,7 +273,8 @@ void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
 
   int n =
       std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
-  size_t length = n * paddle::PaddleDtypeSize(output->type());
+  size_t length = n * paddle::PaddleDtypeSize(
+                          framework::TransToProtoVarType(output->dtype()));
   if (tensor->data.capacity < length) {
     if (tensor->data.data) {
       std::free(tensor->data.data);
diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc
index 27e3417933806..22d4476f37e89 100644
--- a/paddle/fluid/inference/lite/tensor_utils.cc
+++ b/paddle/fluid/inference/lite/tensor_utils.cc
@@ -16,6 +16,7 @@
 #include <functional>
 #include <map>
 #include <memory>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/inference/lite/engine.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
@@ -185,9 +186,11 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
   // the input tensor.
   constexpr int empty_size = 0;
   dst->Resize({empty_size});
-  GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
-                       GetLiteTargetType(src.place()));
-  dst->SetPrecision(GetLitePrecisionType(src.type()));
+  GetLiteTensorDataPtr(
+      dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
+      GetLiteTargetType(src.place()));
+  dst->SetPrecision(
+      GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())));
   paddle::lite::LoD lite_lod;
   SetLoD(&lite_lod, src.lod());
   dst->SetLoD(lite_lod);
@@ -195,8 +198,9 @@ void InitDstTensor(paddle::lite_api::Tensor* dst,
 
 void InitDstTensor(framework::LoDTensor* dst,
                    const paddle::lite_api::Tensor& src) {
-  dst->mutable_data(inference::lite::utils::GetNativePlace(src.target()),
-                    GetNativePrecisionType(src.precision()));
+  dst->mutable_data(
+      inference::lite::utils::GetNativePlace(src.target()),
+      framework::TransToPtenDataType(GetNativePrecisionType(src.precision())));
   SetLoD(dst->mutable_lod(), src.lod());
 }
 
@@ -208,14 +212,16 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst,
   const platform::Place& src_place = src.place();
   const platform::Place& dst_place = GetNativePlace(dst->target());
   const size_t bytes =
-      static_cast<size_t>(src.numel()) * framework::SizeOfType(src.type());
+      static_cast<size_t>(src.numel()) * framework::DataTypeSize(src.dtype());
   dst->Resize(framework::vectorize(src.dims()));
   const void* src_data = src.data();
   void* dst_data{nullptr};
-  dst_data = GetLiteTensorDataPtr(dst, GetLitePrecisionType(src.type()),
-                                  GetLiteTargetType(src.place()));
+  dst_data = GetLiteTensorDataPtr(
+      dst, GetLitePrecisionType(framework::TransToProtoVarType(src.dtype())),
+      GetLiteTargetType(src.place()));
   VLOG(3) << "[CopyAsync fluid -> lite] Bytes = " << bytes << ", src = " << &src
-          << ", dst = " << dst << ", src_type = " << src.type();
+          << ", dst = " << dst
+          << ", src_type = " << framework::TransToProtoVarType(src.dtype());
   MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
   VLOG(3) << "[Lite memory size] Bytes = " << bytes;
 }
@@ -229,12 +235,13 @@ void TensorCopyAsync(framework::LoDTensor* dst,
   const platform::Place& src_place = GetNativePlace(src.target());
   const platform::Place& dst_place = dst->place();
   int64_t src_numel = GetLiteTensorNumel(src);
-  const size_t bytes = src_numel * framework::SizeOfType(dst->type());
+  const size_t bytes = src_numel * framework::DataTypeSize(dst->dtype());
   const void* src_data = src.data<void>();
   // When Lite is ready, the source type needs to be modified here.
-  void* dst_data = dst->mutable_data(dst_place, dst->type());
+  void* dst_data = dst->mutable_data(dst_place, dst->dtype());
   VLOG(3) << "[CopyAsync lite -> fluid] Bytes = " << bytes << ", src = " << &src
-          << ", dst = " << dst << ", src_type = " << dst->type();
+          << ", dst = " << dst
+          << ", src_type = " << framework::TransToProtoVarType(dst->dtype());
   MemoryCopyAsync(dst_place, dst_data, src_place, src_data, bytes, ctx);
   VLOG(3) << "[Lite memory size] Bytes = " << bytes;
 }
@@ -244,7 +251,8 @@ void TensorDataShare(paddle::lite_api::Tensor* dst, framework::LoDTensor* src) {
   dst->Resize(framework::vectorize(src->dims()));
   dst->ShareExternalMemory(src->data(), src->memory_size(),
                            GetLiteTargetType(src->place()));
-  dst->SetPrecision(GetLitePrecisionType(src->type()));
+  dst->SetPrecision(
+      GetLitePrecisionType(framework::TransToProtoVarType(src->dtype())));
   paddle::lite::LoD lite_lod;
   SetLoD(&lite_lod, src->lod());
   dst->SetLoD(lite_lod);
@@ -261,7 +269,9 @@ void TensorDataShare(framework::LoDTensor* dst, paddle::lite_api::Tensor* src) {
       src_raw_data, memory_size, GetNativePlace(src->target())));
   dst->Resize(paddle::framework::make_ddim(src->shape()));
   SetLoD(dst->mutable_lod(), src->lod());
-  dst->ResetHolderWithType(holder, GetNativePrecisionType(src->precision()));
+  dst->ResetHolderWithType(
+      holder,
+      framework::TransToPtenDataType(GetNativePrecisionType(src->precision())));
 }
 
 }  // namespace utils
diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
index 01953bd721f3e..0757f5c505c61 100644
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -1020,7 +1020,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
   }
 
   for (size_t i = 0; i < a_size; i++) {
-    if (a.type() == VarType::FP32) {
+    if (framework::TransToProtoVarType(a.dtype()) == VarType::FP32) {
       const auto *a_data = a.data<float>();
       const auto *b_data = b.data<float>();
       if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
@@ -1029,7 +1029,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
             b_data[i]);
         return false;
       }
-    } else if (a.type() == VarType::INT64) {
+    } else if (framework::TransToProtoVarType(a.dtype()) == VarType::INT64) {
       const auto *a_data = a.data<int64_t>();
       const auto *b_data = b.data<int64_t>();
       if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc
index 6583ad0a4952c..5a09933e0ee24 100644
--- a/paddle/fluid/operators/abs_op.cc
+++ b/paddle/fluid/operators/abs_op.cc
@@ -140,8 +140,9 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetKernelTypeForVar(
       const std::string& var_name, const framework::Tensor& tensor,
       const framework::OpKernelType& expected_kernel_type) const {
-    return framework::OpKernelType(tensor.type(), tensor.place(),
-                                   tensor.layout());
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+        tensor.layout());
   }
 };
 
diff --git a/paddle/fluid/operators/activation_op_mlu.cc b/paddle/fluid/operators/activation_op_mlu.cc
index caa498faddaa1..a2ac1e7d42d45 100644
--- a/paddle/fluid/operators/activation_op_mlu.cc
+++ b/paddle/fluid/operators/activation_op_mlu.cc
@@ -38,10 +38,12 @@ class ActivationMLUKernel : public framework::OpKernel<T> {
     output->mutable_data<T>(ctx.GetPlace());
 
     MLUCnnlActivationDesc act_desc(act_mode, alpha);
-    MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY,
-                                 ToCnnlDataType(input->type()));
-    MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(output->type()));
+    MLUCnnlTensorDesc input_desc(
+        *input, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
+    MLUCnnlTensorDesc output_desc(
+        *output, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));
 
     MLUCnnl::Active(ctx, act_desc.get(), input_desc.get(),
                     reinterpret_cast<const void*>(input->data<T>()),
@@ -61,12 +63,15 @@ class ActivationGradMLUKernel : public framework::OpKernel<T> {
 
     dx->mutable_data<T>(ctx.GetPlace());
 
-    MLUCnnlTensorDesc dout_desc(*dout, CNNL_LAYOUT_ARRAY,
-                                ToCnnlDataType(dout->type()));
-    MLUCnnlTensorDesc out_desc(*out, CNNL_LAYOUT_ARRAY,
-                               ToCnnlDataType(out->type()));
-    MLUCnnlTensorDesc dx_desc(*dx, CNNL_LAYOUT_ARRAY,
-                              ToCnnlDataType(dx->type()));
+    MLUCnnlTensorDesc dout_desc(
+        *dout, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(dout->dtype())));
+    MLUCnnlTensorDesc out_desc(
+        *out, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
+    MLUCnnlTensorDesc dx_desc(
+        *dx, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(dx->dtype())));
     MLUCnnlActivationDesc act_desc(act_mode, alpha);
     MLUCnnl::ActiveGrad(
         ctx, act_desc.get(), nullptr, nullptr, nullptr, nullptr,
diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc
index 3d8a1c74cd663..344eb2348ab3c 100644
--- a/paddle/fluid/operators/activation_op_npu.cc
+++ b/paddle/fluid/operators/activation_op_npu.cc
@@ -76,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel<T> {
     // Step 2: Construct a broadcast factor, which has the same shape with x.
 
     // 2.1 Get a factor tensor with shape [1].
-    Tensor factor_tensor(framework::proto::VarType::FP32);
+    Tensor factor_tensor(experimental::DataType::FLOAT32);
     factor_tensor.mutable_data<float>({1}, place);
     FillNpuTensorWithConstant<float>(&factor_tensor, factor);
 
     // 2.2 Get the factor which has the shape with x and the same value with
     // factor.
-    Tensor factor_bc_tensor(framework::proto::VarType::FP32);
+    Tensor factor_bc_tensor(experimental::DataType::FLOAT32);
     factor_bc_tensor.mutable_data<float>(x_dims, place);
     const auto& runner_bc =
         NpuOpRunner("FillD", {factor_tensor}, {factor_bc_tensor},
@@ -659,14 +659,15 @@ class HardSwishGradNPUKernel : public framework::OpKernel<T> {
                     {{"dims", framework::vectorize(x->dims())}});
     runner_fill.Run(stream);
 
-    Tensor tmp_bool(framework::proto::VarType::BOOL);
+    Tensor tmp_bool(experimental::DataType::BOOL);
     tmp_bool.mutable_data<bool>(x->dims(), place);
     const auto& runner_less =
         NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool});
     runner_less.Run(stream);
     Tensor tmp4(x->type());
     tmp4.mutable_data<T>(x->dims(), place);
-    auto dst_dtype = ConvertToNpuDtype(x->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
     const auto& runner_cast =
         NpuOpRunner("Cast", {tmp_bool}, {tmp4},
                     {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/allclose_op.h b/paddle/fluid/operators/allclose_op.h
index b5683a1d9a93c..7a36754194ace 100644
--- a/paddle/fluid/operators/allclose_op.h
+++ b/paddle/fluid/operators/allclose_op.h
@@ -59,10 +59,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
           rtol->numel(), 1,
           platform::errors::InvalidArgument(
               "Input(Rtol) size must be 1, but get %d.", rtol->numel()));
-      PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Rtol) type must be double, but get %s.",
-                            framework::DataTypeToString(rtol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(rtol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Rtol) type must be double, but get %s.",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(rtol->dtype()))));
       rtol_v = get_tensor_value(dev_ctx, *rtol);
     }
     if (ctx.HasInput("Atol")) {
@@ -71,10 +74,13 @@ class AllcloseKernel : public framework::OpKernel<T> {
           atol->numel(), 1,
           platform::errors::InvalidArgument(
               "Input(Atol) size must be 1, but get %d", atol->numel()));
-      PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Atol) type must be double, but get %s",
-                            framework::DataTypeToString(atol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(atol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Atol) type must be double, but get %s",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(atol->dtype()))));
       atol_v = get_tensor_value(dev_ctx, *atol);
     }
 
diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc
index 0a710dd842fd4..1393da7dd57a7 100644
--- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc
+++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc
@@ -42,7 +42,7 @@ void Update(const platform::NPUDeviceContext& ctx,
     platform::NPUMemsetAsync(static_cast<void*>(g), 0,
                              good_out_tensor->numel() * sizeof(int), stream);
     // bad_out_data = bad_in_data + 1
-    Tensor factor_tensor(bad_out_tensor->type());
+    Tensor factor_tensor(bad_out_tensor->dtype());
     factor_tensor.mutable_data<int>({1}, place);
     FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
     const auto& runner_p2 = NpuOpRunner("Add", {*bad_in_tensor, factor_tensor},
@@ -91,7 +91,7 @@ void Update(const platform::NPUDeviceContext& ctx,
                              bad_out_tensor->numel() * sizeof(int), stream);
 
     // good_out_data = good_in_data + 1
-    Tensor factor_tensor(good_out_tensor->type());
+    Tensor factor_tensor(good_out_tensor->dtype());
     factor_tensor.mutable_data<int>({1}, place);
     FillNpuTensorWithConstant<int>(&factor_tensor, static_cast<int>(1));
     const auto& runner_p2 = NpuOpRunner("Add", {*good_in_tensor, factor_tensor},
@@ -188,7 +188,7 @@ class LazyZerosNPU {
       if (!found_inf_vec[0]) {
         framework::TensorCopy(*x, place, dev_ctx, out);
       } else if (zero_ptr != dst_ptr) {
-        auto size = out->numel() * framework::SizeOfType(out->type());
+        auto size = out->numel() * framework::DataTypeSize(out->dtype());
         memory::Copy(place, dst_ptr, place, zero_ptr, size, stream);
       }
     }
diff --git a/paddle/fluid/operators/argsort_op_npu.cc b/paddle/fluid/operators/argsort_op_npu.cc
index b7290232e87be..a632b4df5dd79 100644
--- a/paddle/fluid/operators/argsort_op_npu.cc
+++ b/paddle/fluid/operators/argsort_op_npu.cc
@@ -75,15 +75,16 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
     framework::NPUAttributeMap attr = {{"axis", -1},
                                        {"descending", descending}};
 
-    Tensor indices_tmp(framework::proto::VarType::INT32);
+    Tensor indices_tmp(experimental::DataType::INT32);
     indices_tmp.Resize(indices->dims());
 
-    if (input->type() == framework::proto::VarType::INT64) {
-      Tensor input_fp32(framework::proto::VarType::FP32);
+    if (framework::TransToProtoVarType(input->dtype()) ==
+        framework::proto::VarType::INT64) {
+      Tensor input_fp32(experimental::DataType::FLOAT32);
       input_fp32.Resize(input->dims());
       CastToFP32(ctx, stream, *input, &input_fp32);
 
-      Tensor output_fp32(framework::proto::VarType::FP32);
+      Tensor output_fp32(experimental::DataType::FLOAT32);
       output_fp32.Resize(output->dims());
 
       if (axis == -1 || axis + 1 == in_dims.size()) {
@@ -112,7 +113,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
         TranposeNPU<float>(ctx, stream, &perm, input_fp32, &trans_input);
 
         Tensor trans_output(input_fp32.type());
-        Tensor trans_indices(framework::proto::VarType::INT32);
+        Tensor trans_indices(experimental::DataType::INT32);
         trans_output.mutable_data<float>(trans_dims, ctx.GetPlace());
         trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
 
@@ -150,7 +151,7 @@ class ArgsortNPUKernel : public framework::OpKernel<T> {
         TranposeNPU<T>(ctx, stream, &perm, *input, &trans_input);
 
         Tensor trans_output(input->type());
-        Tensor trans_indices(framework::proto::VarType::INT32);
+        Tensor trans_indices(experimental::DataType::INT32);
         trans_output.mutable_data<T>(trans_dims, ctx.GetPlace());
         trans_indices.mutable_data<int32_t>(trans_dims, ctx.GetPlace());
 
diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc
index a959067ddba62..f1e633283d9cc 100644
--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@@ -66,7 +66,8 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
     ArrayToLoDFunctorImpl<DeviceContext> functor;
     functor.dev_ctx_ = dev_ctx;
     functor.prev_functor_ = this;
-    framework::VisitDataType(out->type(), functor);
+    framework::VisitDataType(framework::TransToProtoVarType(out->dtype()),
+                             functor);
   }
 };
 
@@ -101,7 +102,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
                           "There's no element in the input array."));
     int rank = x[0].dims().size();
     platform::Place place = x[0].place();
-    auto data_type = x[0].type();
+    auto data_type = x[0].dtype();
     int64_t batch_size = x[0].dims()[0];
     framework::DDim ins_dims = rank > 1
                                    ? framework::slice_ddim(x[0].dims(), 1, rank)
@@ -124,12 +125,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
               "The current place is %d, and the previous place is %d.",
               i, x[i].place(), place));
       PADDLE_ENFORCE_EQ(
-          x[i].type(), data_type,
+          x[i].dtype(), data_type,
           platform::errors::InvalidArgument(
               "The date type of the %zu'th element in LoDTensorArray "
               "differs from previous ones."
               "The current data type is %d, and the previous data type is %d.",
-              i, x[i].type(), data_type));
+              i, x[i].dtype(), data_type));
       batch_size += x[i].dims()[0];
     }
     auto ins_dim_vec = framework::vectorize(ins_dims);
diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc
index 8e960ff89bf51..57da23ba951ad 100644
--- a/paddle/fluid/operators/batch_norm_op.cc
+++ b/paddle/fluid/operators/batch_norm_op.cc
@@ -151,15 +151,19 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType(
     bn_param_type = framework::proto::VarType::FP64;
   }
   PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Scale")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Scale")->dtype()),
       platform::errors::InvalidArgument("Scale input should be of float type"));
   PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Bias")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Bias")->dtype()),
       platform::errors::InvalidArgument("Bias input should be of float type"));
   PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Mean")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Mean")->dtype()),
       platform::errors::InvalidArgument("Mean input should be of float type"));
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Variance")->dtype()),
                     platform::errors::InvalidArgument(
                         "Variance input should be of float type"));
 
diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc
index 1590eed0bb2b0..3fae65c50177b 100644
--- a/paddle/fluid/operators/beam_search_decode_op.cc
+++ b/paddle/fluid/operators/beam_search_decode_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <string>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/beam_search_decode_op.h"
 #include "paddle/fluid/platform/device_context.h"
 
@@ -192,7 +193,7 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
     LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
 
     framework::VisitDataType(
-        scores->at(0).type(),
+        framework::TransToProtoVarType(scores->at(0).dtype()),
         BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores,
                                 beam_size, end_id));
   }
diff --git a/paddle/fluid/operators/bincount_op.cu b/paddle/fluid/operators/bincount_op.cu
index dd7804625a77c..c318737ae962d 100644
--- a/paddle/fluid/operators/bincount_op.cu
+++ b/paddle/fluid/operators/bincount_op.cu
@@ -112,7 +112,7 @@ void BincountCUDAInner(const framework::ExecutionContext& context) {
                                          PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
         input_data, input_numel, has_weights, weights_data, output_data);
   } else {
-    const auto& weights_type = weights->type();
+    const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
 
     if (weights_type == framework::proto::VarType::FP32) {
       float* output_data = output->mutable_data<float>(context.GetPlace());
@@ -141,7 +141,7 @@ class BincountCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     const Tensor* input = context.Input<framework::Tensor>("X");
-    const auto& input_type = input->type();
+    const auto& input_type = framework::TransToProtoVarType(input->dtype());
 
     if (input_type == framework::proto::VarType::INT32) {
       BincountCUDAInner<DeviceContext, T, int>(context);
diff --git a/paddle/fluid/operators/bincount_op.h b/paddle/fluid/operators/bincount_op.h
index 3f4334099e277..60d4dffe3bf98 100644
--- a/paddle/fluid/operators/bincount_op.h
+++ b/paddle/fluid/operators/bincount_op.h
@@ -61,7 +61,7 @@ void BincountInner(const framework::ExecutionContext& context) {
 
   if (has_weights) {
     const T* weights_data = weights->data<T>();
-    const auto& weights_type = weights->type();
+    const auto& weights_type = framework::TransToProtoVarType(weights->dtype());
     if (weights_type == framework::proto::VarType::FP32) {
       float* output_data = output->mutable_data<float>(context.GetPlace());
       pten::funcs::SetConstant<DeviceContext, float>()(
@@ -95,7 +95,7 @@ class BincountKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     const Tensor* input = context.Input<framework::Tensor>("X");
-    const auto& input_type = input->type();
+    const auto& input_type = framework::TransToProtoVarType(input->dtype());
 
     if (input_type == framework::proto::VarType::INT32) {
       BincountInner<DeviceContext, T, int>(context);
diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc
index 0c49dbe7e0f49..4ca0dded3e738 100644
--- a/paddle/fluid/operators/cast_op.cc
+++ b/paddle/fluid/operators/cast_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/cast_op.h"
 #include <memory>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/float16.h"
 #ifdef PADDLE_WITH_MLU
@@ -82,7 +83,9 @@ class CastOp : public framework::OperatorWithKernel {
     auto &tensor_place = tensor->place();
     // NOTE: cuda pinned tensor need to copy its data to target place
     if (platform::is_cuda_pinned_place(tensor_place)) {
-      return framework::OpKernelType(tensor->type(), ctx.device_context());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()),
+          ctx.device_context());
     }
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -100,26 +103,32 @@ class CastOp : public framework::OperatorWithKernel {
       return true;
     };
 
-    if (this->CanMKLDNNBeUsed(ctx, tensor->type()) && MKLDNNSupportsCast()) {
-      return framework::OpKernelType(tensor->type(), ctx.GetPlace(),
-                                     framework::DataLayout::kMKLDNN,
-                                     framework::LibraryType::kMKLDNN);
+    if (this->CanMKLDNNBeUsed(
+            ctx, framework::TransToProtoVarType(tensor->dtype())) &&
+        MKLDNNSupportsCast()) {
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()), ctx.GetPlace(),
+          framework::DataLayout::kMKLDNN, framework::LibraryType::kMKLDNN);
     }
 #endif
 #ifdef PADDLE_WITH_MLU
     auto src_type = static_cast<VT::Type>(ctx.Attr<int>("in_dtype"));
     auto dst_type = static_cast<VT::Type>(ctx.Attr<int>("out_dtype"));
     if (src_type == dst_type || MLUSupportsCast(src_type, dst_type)) {
-      return framework::OpKernelType(tensor->type(), tensor_place);
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()), tensor_place);
     } else {
       VLOG(3) << "MLU not support cast type: "
               << framework::DataTypeToString(src_type)
               << " to type: " << framework::DataTypeToString(dst_type)
               << ", fallbacking to CPU one!";
-      return framework::OpKernelType(tensor->type(), platform::CPUPlace());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor->dtype()),
+          platform::CPUPlace());
     }
 #endif
-    return framework::OpKernelType(tensor->type(), tensor_place);
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(tensor->dtype()), tensor_place);
   }
 };
 
diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h
index 9bf08d072da0f..ee2b280f5ab86 100644
--- a/paddle/fluid/operators/cast_op.h
+++ b/paddle/fluid/operators/cast_op.h
@@ -63,7 +63,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
     out->mutable_data(dev_ctx.GetPlace(),
                       static_cast<framework::proto::VarType::Type>(out_dtype));
 
-    auto pt_out_dtype = pten::TransToPtenDataType(
+    auto pt_out_dtype = framework::TransToPtenDataType(
         static_cast<framework::proto::VarType::Type>(out_dtype));
 
     // call new kernel
diff --git a/paddle/fluid/operators/cast_op_npu.cc b/paddle/fluid/operators/cast_op_npu.cc
index 060bcb78e4a3a..8879a226d29d2 100644
--- a/paddle/fluid/operators/cast_op_npu.cc
+++ b/paddle/fluid/operators/cast_op_npu.cc
@@ -43,7 +43,7 @@ class CastNPUKernel : public framework::OpKernel<T> {
     auto* out = ctx.Output<Tensor>("Out");
     auto place = ctx.GetPlace();
 
-    if (x->type() == dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) == dtype) {
       // NOTE(zhiqiu): NPU cast op may result in wrong value, so
       // add special case here.
       VLOG(4) << "cast to same dtype:" << dtype;
diff --git a/paddle/fluid/operators/cast_op_xpu.cc b/paddle/fluid/operators/cast_op_xpu.cc
index 0ec5ca707a6b5..3337afcd3cb08 100644
--- a/paddle/fluid/operators/cast_op_xpu.cc
+++ b/paddle/fluid/operators/cast_op_xpu.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include <memory>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/cast_op.h"
 #include "paddle/fluid/platform/float16.h"
@@ -45,7 +46,7 @@ class CastXPUKernel : public framework::OpKernel<InT> {
     out->mutable_data(dev_ctx.GetPlace(),
                       static_cast<framework::proto::VarType::Type>(out_dtype));
 
-    auto pt_out_dtype = pten::TransToPtenDataType(
+    auto pt_out_dtype = framework::TransToPtenDataType(
         static_cast<framework::proto::VarType::Type>(out_dtype));
     // call pten kernel
     pten::CastKernel<InT>(
diff --git a/paddle/fluid/operators/class_center_sample_op.cu b/paddle/fluid/operators/class_center_sample_op.cu
index 2d7800d9997fc..92bf20d6cf95d 100644
--- a/paddle/fluid/operators/class_center_sample_op.cu
+++ b/paddle/fluid/operators/class_center_sample_op.cu
@@ -338,8 +338,9 @@ class ClassCenterSampleCUDAKernel : public framework::OpKernel<T> {
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           num_classes_per_device_ptr, num_classes_per_device_ptr,
           num_classes_per_device.numel(),
-          platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum,
-          comm->comm(), calcu_stream));
+          platform::ToNCCLDataType(
+              framework::TransToProtoVarType(num_classes_per_device.dtype())),
+          ncclSum, comm->comm(), calcu_stream));
     }
 #endif
 
diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc
index d71d6fc39b119..67e668cc70d7c 100644
--- a/paddle/fluid/operators/coalesce_tensor_op.cc
+++ b/paddle/fluid/operators/coalesce_tensor_op.cc
@@ -23,6 +23,7 @@
 #ifdef PADDLE_WITH_ASCEND_CL
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -53,7 +54,7 @@ struct FillConstantVisitor {
                  * = nullptr) const {
 #ifdef PADDLE_WITH_ASCEND_CL
     if (platform::is_npu_place(dev_ctx_.GetPlace())) {
-      Tensor tensor_tmp(dtype_);
+      Tensor tensor_tmp(framework::TransToPtenDataType(dtype_));
       tensor_tmp.mutable_data<T>({1}, context_.GetPlace());
       FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value_));
 
@@ -193,7 +194,8 @@ class CoalesceTensorOpKernel : public framework::OpKernel<T> {
     void *fused_tensor_ptr =
         fused_tensor
             ->Resize(framework::make_ddim({static_cast<int64_t>(numel)}))
-            .mutable_data(context.GetPlace(), dtype);
+            .mutable_data(context.GetPlace(),
+                          framework::TransToPtenDataType(dtype));
     VLOG(10) << "Fused tensor addr " << fused_tensor_ptr;
 
     // Init the continuous space
diff --git a/paddle/fluid/operators/collective/allreduce_op.h b/paddle/fluid/operators/collective/allreduce_op.h
index 314a91841bebf..4a35d59c6f3e6 100644
--- a/paddle/fluid/operators/collective/allreduce_op.h
+++ b/paddle/fluid/operators/collective/allreduce_op.h
@@ -41,7 +41,8 @@ class AllReduceOpKernel : public framework::OpKernel<T> {
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
 
-    int dtype = platform::ToNCCLDataType(in->type());
+    int dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     auto* sendbuff = in->data();
     out->Resize(in->dims());
diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc
index 02b10f17da5a3..26fdee200cd84 100644
--- a/paddle/fluid/operators/collective/alltoall_op.cu.cc
+++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc
@@ -31,7 +31,8 @@ class AllToAllOpCUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     auto out = ctx.Output<framework::LoDTensor>("Out");
     int send_numel = x->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     PADDLE_ENFORCE_GE(
diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc
index a98a0bf6ab4a9..c59d8315a369e 100644
--- a/paddle/fluid/operators/collective/barrier_op.cu.cc
+++ b/paddle/fluid/operators/collective/barrier_op.cu.cc
@@ -31,7 +31,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel<T> {
     auto out = ctx.Output<framework::Tensor>("Out");
 
     auto place = ctx.GetPlace();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     const void* sendbuff = in->data();
     void* recvbuff = out->mutable_data<T>(place);
diff --git a/paddle/fluid/operators/collective/broadcast_op.cu.cc b/paddle/fluid/operators/collective/broadcast_op.cu.cc
index 04d028536a9b2..0e14b484e766f 100644
--- a/paddle/fluid/operators/collective/broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/broadcast_op.cu.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace ops = paddle::operators;
 namespace plat = paddle::platform;
@@ -56,7 +57,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel<T> {
 
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
         send_recv_buffer, static_cast<size_t>(in->numel()),
-        platform::ToNCCLDataType(in->type()), root_dev_id, comm, stream));
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())),
+        root_dev_id, comm, stream));
 
     VLOG(3) << "Bcast " << ctx.InputNames("X")[0] << ", (" << in->numel() << ")"
             << " From " << root_dev_id << " to " << dev_id;
diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc
index f174473c049ec..89854999c16fc 100644
--- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -29,7 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel<T> {
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     int nranks = ctx.Attr<int>("nranks");
     int rid = ctx.Attr<int>("ring_id");
diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu.cc b/paddle/fluid/operators/collective/c_allgather_op_npu.cc
index 5ebcc9064f790..5339293da0fe2 100644
--- a/paddle/fluid/operators/collective/c_allgather_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_allgather_op_npu.cc
@@ -31,7 +31,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel<T> {
 #if defined(PADDLE_WITH_ASCEND_CL)
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     std::string group =
diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h
index 0e4210ea7304a..a04935d43eb2d 100644
--- a/paddle/fluid/operators/collective/c_allreduce_op.h
+++ b/paddle/fluid/operators/collective/c_allreduce_op.h
@@ -173,7 +173,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
     auto place = ctx.GetPlace();
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
 
     void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
@@ -231,7 +232,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel<T> {
 
     bool found_nan = false;
 
-    auto d_type = in->type();
+    auto d_type = framework::TransToProtoVarType(in->dtype());
     switch (d_type) {
       case framework::proto::VarType::FP16: {
         break;
@@ -284,7 +285,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel<T> {
     auto out = ctx.Output<framework::Tensor>("Out");
 
     auto place = ctx.GetPlace();
-    BKCLDataType dtype = platform::ToBKCLDataType(in->type());
+    BKCLDataType dtype =
+        platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     const void* sendbuff = in->data<T>();
     out->Resize(in->dims());
@@ -346,7 +348,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel<T> {
     auto out = ctx.Output<framework::Tensor>("Out");
 
     auto place = ctx.GetPlace();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     const void* sendbuff = in->data<T>();
     out->Resize(in->dims());
diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
index 6deb837069761..fa4af87ff10e1 100644
--- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
@@ -30,7 +30,8 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     auto out = ctx.Output<framework::LoDTensor>("Out");
     int numel = x->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int rid = ctx.Attr<int>("ring_id");
     auto place = ctx.GetPlace();
diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
index 0860da1362538..70032ac28a046 100644
--- a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
@@ -30,7 +30,8 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
     int numel = x->numel();
-    HcclDataType dtype = platform::ToHCCLDataType(x->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     auto out = ctx.Output<framework::LoDTensor>("Out");
 
diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc
index 738ed16286131..db5a2317a2d81 100644
--- a/paddle/fluid/operators/collective/c_concat_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc
@@ -31,7 +31,8 @@ class CConcatOpCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto x = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int nranks = ctx.Attr<int>("nranks");
     int rank = ctx.Attr<int>("rank");
diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu
index 9b343b34a3e51..ba50f5c48bd40 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.cu
+++ b/paddle/fluid/operators/collective/c_embedding_op.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/collective/c_embedding_op.h"
@@ -95,7 +96,7 @@ class CEmbeddingCUDAKernel : public framework::OpKernel<T> {
     int blocks = NumBlocks(limit);
     int threads = kNumCUDAThreads;
 
-    const auto &index_type = ids_t->type();
+    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       CEmbedding<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
           output, table, ids_t->data<int32_t>(), K, D, N, start_idx, end_idx,
@@ -138,7 +139,7 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
     auto t = framework::EigenVector<T>::Flatten(*d_table_t);
     t.device(*dev_ctx.eigen_device()) = t.constant(static_cast<T>(0));
 
-    const auto &index_type = ids_t->type();
+    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       CEmbeddingGrad<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
           d_table, d_output, ids_t->data<int32_t>(), K, D, N, start_idx,
diff --git a/paddle/fluid/operators/collective/c_embedding_op.h b/paddle/fluid/operators/collective/c_embedding_op.h
index 1d410847dd798..6bf99d59fa4a5 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.h
+++ b/paddle/fluid/operators/collective/c_embedding_op.h
@@ -66,7 +66,7 @@ class CEmbeddingOpCPUKernel : public framework::OpKernel<T> {
     const int64_t height = table_t->dims()[0];
     const int64_t width = table_t->dims()[1];
 
-    const auto& index_type = ids_t->type();
+    const auto& index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       GetIdsEmbedding(ids_t->data<int32_t>(), ids_t->numel(), start_idx,
                       table_data, height, width, output_data);
@@ -110,9 +110,11 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
         table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());
 
     size_t table_t_mem_size =
-        table_t->numel() * framework::SizeOfType(table_grad_t->type());
+        table_t->numel() * framework::DataTypeSize(table_grad_t->dtype());
     size_t table_grad_t_mem_size =
-        table_grad_t->numel() * framework::SizeOfType(table_grad_t->type());
+        table_grad_t->numel() *
+        framework::SizeOfType(
+            framework::TransToProtoVarType(table_grad_t->dtype()));
 
     VLOG(10) << "table_dims:" << table_t->dims()
              << ", table_t memory_size:" << table_t_mem_size
@@ -125,7 +127,7 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
     const int64_t height = table_t->dims()[0];
     const int64_t width = table_t->dims()[1];
 
-    const auto& index_type = ids_t->type();
+    const auto& index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       UpdateEmbedding(ids_t->data<int32_t>(), ids_t->numel(), start_idx,
                       table_grad_data, height, width, d_output_data);
diff --git a/paddle/fluid/operators/collective/c_embedding_op_npu.cc b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
index 3e96f15d5d3dd..7a7015b8faaa3 100644
--- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
@@ -124,9 +124,10 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) {
       framework::make_ddim({table_t->dims()[0] + 1, table_t->dims()[1]});
   framework::LoDTensor table_t_pad;
 
-  size_t mem_size = table_t->numel() * framework::SizeOfType(table_t->type());
+  size_t mem_size =
+      table_t->numel() * framework::DataTypeSize(table_t->dtype());
   size_t line_mem_size =
-      table_t->dims()[1] * framework::SizeOfType(table_t->type());
+      table_t->dims()[1] * framework::DataTypeSize(table_t->dtype());
   PADDLE_ENFORCE_EQ(line_mem_size % 64, 0,
                     platform::errors::InvalidArgument(
                         "NPU only accept the second dim must align by 64"));
@@ -160,7 +161,7 @@ class CEmbeddingNPUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     auto *ids_t = context.Input<LoDTensor>("Ids");
 
-    const auto &index_type = ids_t->type();
+    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       NPUGetIdsEmbedding<int32_t, T>(context);
     } else {
@@ -200,7 +201,9 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
   uint8_t *pad_data = reinterpret_cast<uint8_t *>(
       table_t_pad.mutable_data<T>(pad_shape, context.GetPlace()));
   size_t table_t_pad_mem_size =
-      table_t_pad.numel() * framework::SizeOfType(table_t_pad.type());
+      table_t_pad.numel() *
+      framework::SizeOfType(
+          framework::TransToProtoVarType(table_t_pad.dtype()));
   platform::NPUMemsetAsync(pad_data, 0, table_t_pad_mem_size, stream,
                            table_t_pad_mem_size);
 
@@ -215,11 +218,11 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
   // copy table_t_pad to table_t
   T *dst = table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());
   const size_t mem_size =
-      table_grad_t->numel() * framework::SizeOfType(table_grad_t->type());
+      table_grad_t->numel() * framework::DataTypeSize(table_grad_t->dtype());
 
   // check align
   size_t line_mem_size =
-      table_grad_t->dims()[1] * framework::SizeOfType(table_grad_t->type());
+      table_grad_t->dims()[1] * framework::DataTypeSize(table_grad_t->dtype());
   PADDLE_ENFORCE_EQ(line_mem_size % 64, 0,
                     platform::errors::InvalidArgument(
                         "NPU only accept the second dim must align by 64"));
@@ -234,7 +237,7 @@ class CEmbeddingGradNPUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     auto *ids_t = context.Input<LoDTensor>("Ids");
 
-    const auto &index_type = ids_t->type();
+    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       NPUUpdateEmbedding<int32_t, T>(context);
     } else {
diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h
index a0e0f8e92bdde..ad798baf3ce91 100644
--- a/paddle/fluid/operators/collective/c_reduce_op.h
+++ b/paddle/fluid/operators/collective/c_reduce_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -131,7 +132,8 @@ class CReduceOpASCENDKernel : public framework::OpKernel<T> {
     auto in = ctx.Input<framework::LoDTensor>("X");
     auto out = ctx.Output<framework::LoDTensor>("Out");
     auto place = ctx.GetPlace();
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
 
     void* sendbuff = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
@@ -211,7 +213,8 @@ class CReduceOpXPUKernel : public framework::OpKernel<T> {
     auto out = ctx.Output<framework::Tensor>("Out");
 
     auto place = ctx.GetPlace();
-    BKCLDataType dtype = platform::ToBKCLDataType(in->type());
+    BKCLDataType dtype =
+        platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     const void* sendbuff = in->data();
     out->Resize(in->dims());
@@ -274,7 +277,8 @@ class CReduceOpCUDAKernel : public framework::OpKernel<T> {
     auto out = ctx.Output<framework::Tensor>("Out");
 
     auto place = ctx.GetPlace();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
     int64_t numel = in->numel();
     const void* sendbuff = in->data();
     out->Resize(in->dims());
diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
index 141fa760413b3..9b05e940d4f60 100644
--- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc
@@ -47,7 +47,8 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel<T> {
     int64_t recv_numel = in->numel() / nranks;
     const T* send_buff = in->data<T>();
     T* recv_buff = out->data<T>();
-    int dtype = platform::ToNCCLDataType(in->type());
+    int dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     gpuStream_t stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc
index 7241f66174d55..efc94ce1eaef9 100644
--- a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc
@@ -51,7 +51,8 @@ class CReduceScatterOpAscendKernel : public framework::OpKernel<T> {
 
     void* inputPtr = reinterpret_cast<void*>(const_cast<T*>(in->data<T>()));
     void* outputPtr = reinterpret_cast<void*>(out->data<T>());
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     aclrtStream stream = nullptr;
     if (ctx.Attr<bool>("use_calc_stream")) {
diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc
index 4d4dc0c12af55..7a964c9c69717 100644
--- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc
@@ -30,7 +30,8 @@ class CScatterOpCUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     auto out = ctx.Output<framework::LoDTensor>("Out");
     int numel = x->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int nranks = ctx.Attr<int>("nranks");
     int root_id = ctx.Attr<int>("root");
diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
index 6371d523cfa4a..4f1f1ec651206 100644
--- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
+++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu
@@ -121,8 +121,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
         eigen_logits.maximum(along_axis);
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
         logits_max_buff, logits_max_buff, logits_max.numel(),
-        platform::ToNCCLDataType(logits_max.type()), ncclMax, comm->comm(),
-        stream));
+        platform::ToNCCLDataType(
+            framework::TransToProtoVarType(logits_max.dtype())),
+        ncclMax, comm->comm(), stream));
 
     // step 2, obtain logit - logit_max
     Eigen::DSizes<int, 2> batch_by_one(N, 1);
@@ -147,7 +148,7 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
 
     int blocks = NumBlocks(N);
     int threads = kNumCUDAThreads;
-    const auto& label_type = labels->type();
+    const auto& label_type = framework::TransToProtoVarType(labels->dtype());
 
     if (label_type == framework::proto::VarType::INT32) {
       MaskLabelByIndex<T, int32_t><<<blocks, threads, 0, dev_ctx.stream()>>>(
@@ -162,8 +163,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
     void* predict_logits_buff = predicted_logits.mutable_data<T>(place);
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
         predict_logits_buff, predict_logits_buff, predicted_logits.numel(),
-        platform::ToNCCLDataType(predicted_logits.type()), ncclSum,
-        comm->comm(), stream));
+        platform::ToNCCLDataType(
+            framework::TransToProtoVarType(predicted_logits.dtype())),
+        ncclSum, comm->comm(), stream));
 
     // step 4, obtain exp(logit)
     eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp();
@@ -180,8 +182,9 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
 
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
         sum_exp_logits_buff, sum_exp_logits_buff, sum_exp_logits.numel(),
-        platform::ToNCCLDataType(sum_exp_logits.type()), ncclSum, comm->comm(),
-        stream));
+        platform::ToNCCLDataType(
+            framework::TransToProtoVarType(sum_exp_logits.dtype())),
+        ncclSum, comm->comm(), stream));
 
     auto eigen_loss = math::EigenMatrix<T>::From(loss_2d);
     auto eigen_predicted_logits = math::EigenMatrix<T>::From(predicted_logits);
@@ -225,7 +228,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
 
     int blocks = NumBlocks(N * D);
     int threads = kNumCUDAThreads;
-    const auto& label_type = labels->type();
+    const auto& label_type = framework::TransToProtoVarType(labels->dtype());
     const int start_index = rank * D;
     const int end_index = start_index + D;
 
diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc
index e2ff823420aef..0f419a6aa52ab 100644
--- a/paddle/fluid/operators/collective/global_gather_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -30,8 +31,10 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     auto local_count = ctx.Input<framework::LoDTensor>("local_count");
     auto global_count = ctx.Input<framework::LoDTensor>("global_count");
-    auto local_count_type = local_count->type();
-    auto global_count_type = global_count->type();
+    auto local_count_type =
+        framework::TransToProtoVarType(local_count->dtype());
+    auto global_count_type =
+        framework::TransToProtoVarType(global_count->dtype());
     if (local_count_type != framework::proto::VarType::INT64) {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "Please use int64 type in local_count."));
@@ -65,7 +68,8 @@ class GlobalGatherOpCUDAKernel : public framework::OpKernel<T> {
       cpu_global_count_data = cpu_global_count.data<int64_t>();
     }
 
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     PADDLE_ENFORCE_GE(
diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
index c47d27366c5f2..7a42c9abcb352 100644
--- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc
+++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -30,8 +31,10 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     auto local_count = ctx.Input<framework::LoDTensor>("local_count");
     auto global_count = ctx.Input<framework::LoDTensor>("global_count");
-    auto local_count_type = local_count->type();
-    auto global_count_type = global_count->type();
+    auto local_count_type =
+        framework::TransToProtoVarType(local_count->dtype());
+    auto global_count_type =
+        framework::TransToProtoVarType(global_count->dtype());
     if (local_count_type != framework::proto::VarType::INT64) {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "Please use int64 type in local_count."));
@@ -63,7 +66,8 @@ class GlobalScatterOpCUDAKernel : public framework::OpKernel<T> {
       global_count_len = cpu_global_count.numel();
     }
 
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     PADDLE_ENFORCE_GE(
diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
index 094847beca214..4299578c173aa 100644
--- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc
@@ -30,7 +30,8 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
     int64_t numel = in->numel();
-    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     int nranks = ctx.Attr<int>("nranks");
     int rank = ctx.Attr<int>("rank");
diff --git a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc
index 1ac020838a3f8..0314bb7d5de1d 100644
--- a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc
@@ -29,7 +29,8 @@ class CallPartialGatherOpASCENDKernel : public framework::OpKernel<T> {
     auto in = ctx.Input<framework::Tensor>("X");
     auto out = ctx.Output<framework::Tensor>("Out");
     int64_t numel = in->numel();
-    HcclDataType dtype = platform::ToHCCLDataType(in->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype()));
 
     int rank = ctx.Attr<int>("rank");
     int ring_id = ctx.Attr<int>("ring_id");
diff --git a/paddle/fluid/operators/collective/partial_recv_op_npu.cc b/paddle/fluid/operators/collective/partial_recv_op_npu.cc
index bbdf14b1bdc00..f14ce5f81f905 100644
--- a/paddle/fluid/operators/collective/partial_recv_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_recv_op_npu.cc
@@ -35,7 +35,8 @@ class PartialRecvOpASCENDKernel : public framework::OpKernel<T> {
     void* ptr =
         reinterpret_cast<void*>(const_cast<T*>(out->data<T>()) + offset);
     int numel = recv_numel;
-    HcclDataType dtype = platform::ToHCCLDataType(out->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(out->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
 
diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc
index 8a4f7f750a15b..f6b4d85c9d593 100644
--- a/paddle/fluid/operators/collective/partial_send_op.cu.cc
+++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/collective_helper.h"
 #include "paddle/fluid/platform/device/gpu/nccl_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -70,7 +71,8 @@ class PartialSendCUDAKernel : public framework::OpKernel<T> {
                                           "be less than comm->nranks (%d).",
                                           peer, comm->nranks()));
 
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
     int send_numel = numel / num;
     int offset = send_numel * id;
 
diff --git a/paddle/fluid/operators/collective/partial_send_op_npu.cc b/paddle/fluid/operators/collective/partial_send_op_npu.cc
index 1918e6c16ff25..31c74fcc196be 100644
--- a/paddle/fluid/operators/collective/partial_send_op_npu.cc
+++ b/paddle/fluid/operators/collective/partial_send_op_npu.cc
@@ -33,7 +33,8 @@ class PartialSendOpASCENDKernel : public framework::OpKernel<T> {
 
     void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()) + offset);
     int numel = send_numel;
-    HcclDataType dtype = platform::ToHCCLDataType(x->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     auto place = ctx.GetPlace();
diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu.cc b/paddle/fluid/operators/collective/recv_v2_op_npu.cc
index c20aa76ea40d5..6a2244b91025a 100644
--- a/paddle/fluid/operators/collective/recv_v2_op_npu.cc
+++ b/paddle/fluid/operators/collective/recv_v2_op_npu.cc
@@ -31,7 +31,8 @@ class CRecvOpASCENDKernel : public framework::OpKernel<T> {
     out->mutable_data<T>(out->dims(), ctx.GetPlace());
     void* ptr = reinterpret_cast<void*>(const_cast<T*>(out->data<T>()));
     int numel = out->numel();
-    HcclDataType dtype = platform::ToHCCLDataType(out->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(out->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     auto place = ctx.GetPlace();
diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc
index 952fcf2065d59..95eadb728caed 100644
--- a/paddle/fluid/operators/collective/send_v2_op.cu.cc
+++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc
@@ -61,7 +61,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel<T> {
         VLOG(3) << "LodTensorArray: idx(" << idx << ")";
         auto& x = x_array.at(idx);
         int numel = x.numel();
-        ncclDataType_t dtype = platform::ToNCCLDataType(x.type());
+        ncclDataType_t dtype =
+            platform::ToNCCLDataType(framework::TransToProtoVarType(x.dtype()));
         PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend(
             x.data<T>(), numel, dtype, peer, comm->comm(), stream));
         VLOG(3) << "rank " << comm->rank() << " send "
@@ -72,7 +73,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     int numel = x->numel();
 
-    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+    ncclDataType_t dtype =
+        platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype()));
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend(
         x->data<T>(), numel, dtype, peer, comm->comm(), stream));
     VLOG(3) << "rank " << comm->rank() << " send "
diff --git a/paddle/fluid/operators/collective/send_v2_op_npu.cc b/paddle/fluid/operators/collective/send_v2_op_npu.cc
index 86d86a01354b2..3bc5487371bac 100644
--- a/paddle/fluid/operators/collective/send_v2_op_npu.cc
+++ b/paddle/fluid/operators/collective/send_v2_op_npu.cc
@@ -30,7 +30,8 @@ class CSendOpASCENDKernel : public framework::OpKernel<T> {
     auto x = ctx.Input<framework::LoDTensor>("X");
     void* ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
     int numel = x->numel();
-    HcclDataType dtype = platform::ToHCCLDataType(x->type());
+    HcclDataType dtype =
+        platform::ToHCCLDataType(framework::TransToProtoVarType(x->dtype()));
 
     int ring_id = ctx.Attr<int>("ring_id");
     auto place = ctx.GetPlace();
diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
index 2746f0345302f..5d6f5f4d8593d 100644
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
@@ -76,7 +76,7 @@ class ConcatOp : public framework::OperatorWithKernel {
     bool flag = 0;
     for (auto *input : inputs) {
       if (input->IsInitialized() && input->numel() > 0) {
-        input_data_type = input->type();
+        input_data_type = framework::TransToProtoVarType(input->dtype());
         flag = 1;
         break;
       }
diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc
index 6325d1ccda8c8..1a6e29f3ac392 100644
--- a/paddle/fluid/operators/concat_op_mlu.cc
+++ b/paddle/fluid/operators/concat_op_mlu.cc
@@ -61,13 +61,15 @@ class ConcatMLUKernel : public framework::OpKernel<T> {
     std::vector<cnnlTensorDescriptor_t> desc_vector;
     for (size_t i = 0; i < ins_size; i++) {
       input_descs.emplace_back(MLUCnnlTensorDesc(
-          *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type())));
+          *ins[i], CNNL_LAYOUT_ARRAY,
+          ToCnnlDataType(framework::TransToProtoVarType(ins[i]->dtype()))));
       desc_vector.push_back(input_descs.back().get());
       inputs.push_back(GetBasePtr(ins[i]));
     }
     // init out tensors
-    MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(out->type()));
+    MLUCnnlTensorDesc output_desc(
+        *out, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
 
     // MLU should do sth
     MLUCnnl::Concat(ctx, ins_size_t, axis_t, desc_vector.data(), inputs.data(),
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
index f961e479ce47c..68720e70b09ad 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -242,7 +242,7 @@ class ConditionalBlockGradOp : public ConditionalOp {
     }
     VLOG(4) << "Assigning zero to " << outside_tensor;
     outside_tensor->Resize(input_tensor.dims());
-    outside_tensor->mutable_data(place, input_tensor.type());
+    outside_tensor->mutable_data(place, input_tensor.dtype());
     const platform::DeviceContext *dev_ctx =
         platform::DeviceContextPool::Instance().Get(place);
     pten::funcs::set_constant(*dev_ctx, outside_tensor, 0.0f);
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h
index a44cff555f0a1..c024e4a12cd47 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.h
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.h
@@ -65,7 +65,8 @@ class ConditionalOp : public framework::OperatorBase {
         platform::errors::InvalidArgument(
             "condition should have one initialized input as condition"));
 
-    PADDLE_ENFORCE_EQ(ips[0]->type() == framework::proto::VarType::BOOL &&
+    PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(ips[0]->dtype()) ==
+                              framework::proto::VarType::BOOL &&
                           ips[0]->numel() == 1,
                       true, platform::errors::InvalidArgument(
                                 "condition input's data type should be bool, "
diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
index 8e46c7acf0918..0727895935525 100644
--- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
@@ -169,7 +169,7 @@ class ReadFromArrayOp : public ArrayOp {
       auto &fw_var_tensor = fw_var->Get<framework::LoDTensor>();
 
       framework::AttributeMap attrs;
-      attrs["dtype"] = fw_var_tensor.type();
+      attrs["dtype"] = framework::TransToProtoVarType(fw_var_tensor.dtype());
       attrs["shape"] = framework::vectorize<int>(fw_var_tensor.dims());
       attrs["value"] = 0.0f;
 
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index 6fa309f0e2d90..67aabe758810a 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -375,7 +375,8 @@ class WhileGradOp : public framework::OperatorBase {
               var->IsType<LoDTensor>()) {
             auto &inside_tensor = var->Get<framework::LoDTensor>();
             framework::AttributeMap attrs;
-            attrs["dtype"] = inside_tensor.type();
+            attrs["dtype"] =
+                framework::TransToProtoVarType(inside_tensor.dtype());
             attrs["shape"] = framework::vectorize<int>(inside_tensor.dims());
             attrs["value"] = 0.0f;
 
diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc
index e500814232aae..d8b54d4f95c0d 100644
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -194,7 +194,8 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
   if (input_data_type != framework::proto::VarType::INT8 &&
       input_data_type != framework::proto::VarType::UINT8 &&
       input_data_type != framework::proto::VarType::BF16) {
-    auto filter_data_type = ctx.Input<Tensor>("Filter")->type();
+    auto filter_data_type =
+        framework::TransToProtoVarType(ctx.Input<Tensor>("Filter")->dtype());
     PADDLE_ENFORCE_EQ(
         input_data_type, filter_data_type,
         platform::errors::InvalidArgument(
diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h
index fb22765d76ea6..956ba80f32bd1 100644
--- a/paddle/fluid/operators/conv_op.h
+++ b/paddle/fluid/operators/conv_op.h
@@ -235,8 +235,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
     const std::string data_format = context.Attr<std::string>("data_format");
     const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
 
-    Tensor transformed_input(input->type());
-    Tensor transformed_output(output->type());
+    Tensor transformed_input(input->dtype());
+    Tensor transformed_output(output->dtype());
 
     if (channel_last) {
       ResizeToChannelFirst<DeviceContext, T>(context, input,
@@ -398,8 +398,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
 
     const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
 
-    Tensor transformed_input(input->type());
-    Tensor transformed_output_grad(output_grad->type());
+    Tensor transformed_input(input->dtype());
+    Tensor transformed_output_grad(output_grad->dtype());
 
     if (channel_last) {
       ResizeToChannelFirst<DeviceContext, T>(context, input,
@@ -490,7 +490,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
 
     if (input_grad) {
       input_grad->mutable_data<T>(context.GetPlace());
-      Tensor transformed_input_grad(input_grad->type());
+      Tensor transformed_input_grad(input_grad->dtype());
       if (channel_last) {
         ResizeToChannelFirst<DeviceContext, T>(context, input_grad,
                                                &transformed_input_grad);
@@ -616,9 +616,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
     const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC");
 
     // transform Tensor
-    Tensor transformed_X(X->type());
-    Tensor transformed_dY(dY->type());
-    Tensor transformed_ddX(X->type());
+    Tensor transformed_X(X->dtype());
+    Tensor transformed_dY(dY->dtype());
+    Tensor transformed_ddX(X->dtype());
 
     if (channel_last) {
       ResizeToChannelFirst<DeviceContext, T>(ctx, X, &transformed_X);
@@ -703,7 +703,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
       ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape);
       dX->mutable_data<T>(ctx.GetPlace());
 
-      Tensor transformed_dX(dX->type());
+      Tensor transformed_dX(dX->dtype());
 
       if (channel_last) {
         ResizeToChannelFirst<DeviceContext, T>(ctx, dX, &transformed_dX);
@@ -794,7 +794,7 @@ class GemmConvDoubleGradKernel : public framework::OpKernel<T> {
     if (ddY) {
       ddY->mutable_data<T>(ctx.GetPlace());
 
-      Tensor transformed_ddY(ddY->type());
+      Tensor transformed_ddY(ddY->dtype());
       if (channel_last) {
         ResizeToChannelFirst<DeviceContext, T>(ctx, ddY, &transformed_ddY);
       } else {
diff --git a/paddle/fluid/operators/conv_op_mlu.cc b/paddle/fluid/operators/conv_op_mlu.cc
index 88698c02dd5da..2155de4d05ecb 100644
--- a/paddle/fluid/operators/conv_op_mlu.cc
+++ b/paddle/fluid/operators/conv_op_mlu.cc
@@ -80,12 +80,14 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
                               true /*need_reshape_or_alloc*/);
 
     cnnlTensorLayout_t data_layout = CNNL_LAYOUT_NHWC;
-    MLUCnnlTensorDesc input_desc(input_tensor, data_layout,
-                                 ToCnnlDataType(input_tensor.type()));
+    MLUCnnlTensorDesc input_desc(
+        input_tensor, data_layout,
+        ToCnnlDataType(framework::TransToProtoVarType(input_tensor.dtype())));
     MLUCnnlTensorDesc filter_desc(trans_filter, data_layout,
                                   ToCnnlDataType(trans_filter.type()));
-    MLUCnnlTensorDesc output_desc(output_tensor, data_layout,
-                                  ToCnnlDataType(output_tensor.type()));
+    MLUCnnlTensorDesc output_desc(
+        output_tensor, data_layout,
+        ToCnnlDataType(framework::TransToProtoVarType(output_tensor.dtype())));
 
     MLUCnnlConvolutionDesc conv_desc(in_dims_size, paddings.data(),
                                      strides.data(), dilations.data(), groups,
diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc
index a2e6ff214bfa3..ffb9ffcf98c91 100644
--- a/paddle/fluid/operators/correlation_op.cc
+++ b/paddle/fluid/operators/correlation_op.cc
@@ -104,9 +104,11 @@ class CorrelationOp : public framework::OperatorWithKernel {
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type =
         OperatorWithKernel::IndicateVarDataType(ctx, "Input1");
-    PADDLE_ENFORCE_EQ(input_data_type, ctx.Input<Tensor>("Input2")->type(),
-                      platform::errors::InvalidArgument(
-                          "X and Y shoule have the same datatype"));
+    PADDLE_ENFORCE_EQ(
+        input_data_type,
+        framework::TransToProtoVarType(ctx.Input<Tensor>("Input2")->dtype()),
+        platform::errors::InvalidArgument(
+            "X and Y shoule have the same datatype"));
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 
diff --git a/paddle/fluid/operators/crop_op_npu.cc b/paddle/fluid/operators/crop_op_npu.cc
index 013ad5dd8cb86..782fb10bc5368 100644
--- a/paddle/fluid/operators/crop_op_npu.cc
+++ b/paddle/fluid/operators/crop_op_npu.cc
@@ -80,7 +80,7 @@ class CropNPUKernel : public framework::OpKernel<T> {
                             "Input(shape) should be equal to the shape of dims "
                             "(%d) of the Input(X).",
                             shape_size.size(), x->dims().size()));
-      Tensor tmp_shape(x->type());
+      Tensor tmp_shape(x->dtype());
       tmp_shape.Resize(framework::make_ddim(shape_size));
       tmp_shape.mutable_data<T>(ctx.GetPlace());
       const auto& runner =
diff --git a/paddle/fluid/operators/cumsum_op_npu.cc b/paddle/fluid/operators/cumsum_op_npu.cc
index ea0b3085704bc..24b6fb777e9a3 100644
--- a/paddle/fluid/operators/cumsum_op_npu.cc
+++ b/paddle/fluid/operators/cumsum_op_npu.cc
@@ -27,10 +27,12 @@ static void CumsumImp(const Tensor& input, Tensor* output,
   auto stream =
       ctx.template device_context<paddle::platform::NPUDeviceContext>()
           .stream();
-  if (input.type() == framework::proto::VarType::INT64) {
+  if (framework::TransToProtoVarType(input.dtype()) ==
+      framework::proto::VarType::INT64) {
     Tensor tmp_input;
     tmp_input.mutable_data<float>(input.dims(), ctx.GetPlace());
-    auto dst_acl_dtype = ConvertToNpuDtype(tmp_input.type());
+    auto dst_acl_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type()));
     const auto& cast_runner_1 =
         NpuOpRunner("Cast", {input}, {tmp_input},
                     {{"dst_type", static_cast<int>(dst_acl_dtype)}});
@@ -42,7 +44,8 @@ static void CumsumImp(const Tensor& input, Tensor* output,
         NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input);
     runner.Run(stream);
 
-    dst_acl_dtype = ConvertToNpuDtype(output->type());
+    dst_acl_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(output->type()));
     const auto& cast_runner_2 =
         NpuOpRunner("Cast", {tmp_output}, {*output},
                     {{"dst_type", static_cast<int>(dst_acl_dtype)}});
diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu
index 5d157a77b3dd1..28a7922120139 100644
--- a/paddle/fluid/operators/data_norm_op.cu
+++ b/paddle/fluid/operators/data_norm_op.cu
@@ -179,15 +179,18 @@ class DataNormGradKernel<platform::CUDADeviceContext, T>
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           reinterpret_cast<const void *>(d_batch_size),
           reinterpret_cast<void *>(d_batch_size), C,
-          platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream));
+          platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())),
+          ncclSum, comm->comm(), stream));
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           reinterpret_cast<const void *>(d_batch_sum),
           reinterpret_cast<void *>(d_batch_sum), C,
-          platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream));
+          platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())),
+          ncclSum, comm->comm(), stream));
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           reinterpret_cast<const void *>(d_batch_square_sum),
           reinterpret_cast<void *>(d_batch_square_sum), C,
-          platform::ToNCCLDataType(x->type()), ncclSum, comm->comm(), stream));
+          platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())),
+          ncclSum, comm->comm(), stream));
       platform::GpuStreamSync(stream);
 #else
       PADDLE_THROW(platform::errors::PreconditionNotMet(
diff --git a/paddle/fluid/operators/decode_jpeg_op.cc b/paddle/fluid/operators/decode_jpeg_op.cc
index dd82c74885b94..d0142244aaeb1 100644
--- a/paddle/fluid/operators/decode_jpeg_op.cc
+++ b/paddle/fluid/operators/decode_jpeg_op.cc
@@ -73,8 +73,9 @@ class DecodeJpegOp : public framework::OperatorWithKernel {
       return expected_kernel_type;
     }
 
-    return framework::OpKernelType(tensor.type(), tensor.place(),
-                                   tensor.layout());
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+        tensor.layout());
   }
 };
 
diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h
index 18c45a1a4c6c1..7bbbbe7f40ecc 100644
--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <algorithm>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor.h"
@@ -146,7 +147,7 @@ static void AppendProposals(framework::Tensor* dst, int64_t offset,
                             const framework::Tensor& src) {
   auto* out_data = dst->data();
   auto* to_add_data = src.data();
-  size_t size_of_t = framework::SizeOfType(src.type());
+  size_t size_of_t = framework::DataTypeSize(src.dtype());
   offset *= size_of_t;
   std::memcpy(
       reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(out_data) + offset),
diff --git a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc
index a5b2133847c60..e2989bdb31b48 100644
--- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc
+++ b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc
@@ -43,7 +43,8 @@ struct DensityPriorBoxFunction {
     runner.Run(stream);
   }
   void Cast(const Tensor* x, Tensor* y) {
-    auto dst_dtype = ConvertToNpuDtype(y->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(y->type()));
     const auto& runner = NpuOpRunner(
         "Cast", {*x}, {*y}, {{"dst_type", static_cast<int>(dst_dtype)}});
     runner.Run(stream);
@@ -121,7 +122,7 @@ struct DensityPriorBoxFunction {
 
 template <>
 void DensityPriorBoxFunction<fp16>::Arange(int n, Tensor* x) {
-  Tensor x_fp32(framework::proto::VarType::FP32);
+  Tensor x_fp32(experimental::DataType::FLOAT32);
   x_fp32.mutable_data<float>(x->dims(), place);
   FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
   const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {});
@@ -132,7 +133,7 @@ void DensityPriorBoxFunction<fp16>::Arange(int n, Tensor* x) {
 template <>
 void DensityPriorBoxFunction<fp16>::FloatVec2Tsr(const std::vector<float>& vec,
                                                  Tensor* tsr_dst) {
-  Tensor tsr_fp32(framework::proto::VarType::FP32);
+  Tensor tsr_fp32(experimental::DataType::FLOAT32);
   tsr_fp32.mutable_data<float>(tsr_dst->dims(), place);
   framework::TensorFromVector<float>(vec, ctx.device_context(), &tsr_fp32);
   ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait();
@@ -164,7 +165,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
     int layer_w = input->dims()[3];
     int layer_h = input->dims()[2];
 
-    auto _type = input->type();
+    auto _type = input->dtype();
     auto place = ctx.GetPlace();
     DensityPriorBoxFunction<T> F(ctx);
 
diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc
index c76ee58c7fa9e..4709120027c58 100644
--- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc
@@ -85,7 +85,7 @@ class IouSimilarityNPUKernel : public framework::OpKernel<T> {
     bool normalized = ctx.Attr<bool>("box_normalized");
     auto* out = ctx.Output<framework::LoDTensor>("Out");
 
-    auto _type = x->type();
+    auto _type = x->dtype();
     auto place = ctx.GetPlace();
 
     IouFunction<T> F(ctx);
diff --git a/paddle/fluid/operators/detection/prior_box_op.cc b/paddle/fluid/operators/detection/prior_box_op.cc
index cf19e2411090a..624d3c87cd9b1 100644
--- a/paddle/fluid/operators/detection/prior_box_op.cc
+++ b/paddle/fluid/operators/detection/prior_box_op.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -101,7 +102,8 @@ class PriorBoxOp : public framework::OperatorWithKernel {
         this->CanMKLDNNBeUsed(ctx, input_input_type)) {
       library_ = framework::LibraryType::kMKLDNN;
       layout_ = framework::DataLayout::kMKLDNN;
-      auto input_image_type = ctx.Input<framework::Tensor>("Image")->type();
+      auto input_image_type = framework::TransToProtoVarType(
+          ctx.Input<framework::Tensor>("Image")->dtype());
       int customized_type_value =
           framework::OpKernelType::kDefaultCustomizedTypeValue;
       if (input_image_type == framework::DataTypeTrait<float>::DataType()) {
diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h
index 94c8513086c20..2b579900267fd 100644
--- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h
+++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h
@@ -196,7 +196,7 @@ class DlnneEngineOp : public framework::OperatorBase {
       index++;
       int64_t data_bytes;
       int32_t dtype;
-      auto type = t.type();
+      auto type = framework::TransToProtoVarType(t.dtype());
       data_bytes = 1;
       void *buffer = nullptr;
       if (type == framework::proto::VarType::FP32) {
diff --git a/paddle/fluid/operators/dropout_op_npu.cc b/paddle/fluid/operators/dropout_op_npu.cc
index 98a38a07dadaa..b5919459f078d 100644
--- a/paddle/fluid/operators/dropout_op_npu.cc
+++ b/paddle/fluid/operators/dropout_op_npu.cc
@@ -56,8 +56,8 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
 
     // only achive the default `upscale_in_train` method
     if (!is_test) {
-      Tensor tmp_x(x->type());
-      Tensor tmp_out(out->type());
+      Tensor tmp_x(x->dtype());
+      Tensor tmp_out(out->dtype());
       tmp_x.ShareDataWith(*x);
       tmp_out.ShareDataWith(*out);
       if (x->dims().size() == 1) {
@@ -80,7 +80,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
         seed = ctx.Attr<bool>("fix_seed") ? ctx.Attr<int>("seed") : 0;
       }
 
-      Tensor keep_prob_tensor(x->type());
+      Tensor keep_prob_tensor(x->dtype());
       keep_prob_tensor.mutable_data<T>({1}, ctx.GetPlace());
       FillNpuTensorWithConstant<T>(&keep_prob_tensor,
                                    static_cast<T>(keep_prob));
@@ -89,7 +89,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
 
       // mask used in `DropOutGenMask` NPU OP is different from
       // the output `Mask`.
-      Tensor npu_mask(framework::proto::VarType::UINT8);
+      Tensor npu_mask(experimental::DataType::UINT8);
       uint32_t length = (x->numel() + 128 - 1) / 128 * 128;
       npu_mask.Resize(framework::make_ddim({length / 8}));
       npu_mask.mutable_data<uint8_t>(ctx.GetPlace());
@@ -116,17 +116,19 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
       runner_dropout.Run(stream);
 
       // cast `out` from float/float16 to bool
-      Tensor cast_mask(framework::proto::VarType::BOOL);
+      Tensor cast_mask(experimental::DataType::BOOL);
       cast_mask.Resize(mask->dims());
       cast_mask.mutable_data<bool>(ctx.GetPlace());
-      auto dst_dtype_bool = ConvertToNpuDtype(cast_mask.type());
+      auto dst_dtype_bool =
+          ConvertToNpuDtype(framework::TransToProtoVarType(cast_mask.dtype()));
       const auto& runner_cast_mask_bool =
           NpuOpRunner("Cast", {*out}, {cast_mask},
                       {{"dst_type", static_cast<int>(dst_dtype_bool)}});
       runner_cast_mask_bool.Run(stream);
 
       // cast cast_mask from bool to uint8
-      auto dst_dtype_uint8 = ConvertToNpuDtype(mask->type());
+      auto dst_dtype_uint8 =
+          ConvertToNpuDtype(framework::TransToProtoVarType(mask->dtype()));
       const auto& runner_cast_mask_uint8 =
           NpuOpRunner("Cast", {cast_mask}, {*mask},
                       {{"dst_type", static_cast<int>(dst_dtype_uint8)}});
@@ -167,10 +169,11 @@ class DropoutGradNPUKernel : public framework::OpKernel<T> {
     }
 
     // cast mask from uint8 to float32/float16
-    Tensor cast_mask(dx->type());
+    Tensor cast_mask(dx->dtype());
     cast_mask.Resize(mask->dims());
     cast_mask.mutable_data<T>(ctx.GetPlace());
-    auto dst_dtype = ConvertToNpuDtype(dx->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(dx->dtype()));
     const auto& runner_cast_mask =
         NpuOpRunner("Cast", {*mask}, {cast_mask},
                     {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h
index 4dd5b7cfd8499..b4b6e2ce2fc56 100644
--- a/paddle/fluid/operators/eig_op.h
+++ b/paddle/fluid/operators/eig_op.h
@@ -189,7 +189,7 @@ class EigKernel : public framework::OpKernel<T> {
     auto* out_values = context.Output<Tensor>("Eigenvalues");
     auto* out_vectors = context.Output<Tensor>("Eigenvectors");
 
-    if (!framework::IsComplexType(x->type())) {
+    if (!framework::IsComplexType(framework::TransToProtoVarType(x->dtype()))) {
       out_values->mutable_data<Tout>(context.GetPlace());
       out_vectors->mutable_data<Tout>(context.GetPlace());
 
diff --git a/paddle/fluid/operators/eigvals_op.h b/paddle/fluid/operators/eigvals_op.h
index 6fdf849ac7613..d825833b02422 100644
--- a/paddle/fluid/operators/eigvals_op.h
+++ b/paddle/fluid/operators/eigvals_op.h
@@ -110,7 +110,8 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input,
                      static_cast<T*>(NULL), &info);
 
   std::string name = "framework::platform::dynload::dgeev_";
-  if (input.type() == framework::proto::VarType::FP64) {
+  if (framework::TransToProtoVarType(input.dtype()) ==
+      framework::proto::VarType::FP64) {
     name = "framework::platform::dynload::sgeev_";
   }
   CheckLapackEigResult(info, name);
@@ -159,7 +160,8 @@ LapackEigvals(const framework::ExecutionContext& ctx, const Tensor& input,
       rwork->template data<Real<T>>(), &info);
 
   std::string name = "framework::platform::dynload::cgeev_";
-  if (input.type() == framework::proto::VarType::COMPLEX64) {
+  if (framework::TransToProtoVarType(input.dtype()) ==
+      framework::proto::VarType::COMPLEX64) {
     name = "framework::platform::dynload::zgeev_";
   }
   CheckLapackEigResult(info, name);
@@ -203,7 +205,8 @@ class EigvalsKernel : public framework::OpKernel<T> {
       lwork = 3 * n_dim;
       work.mutable_data<T>(framework::make_ddim({lwork}), ctx.GetPlace());
     }
-    if (framework::IsComplexType(input->type())) {
+    if (framework::IsComplexType(
+            framework::TransToProtoVarType(input->dtype()))) {
       rwork.mutable_data<Real<T>>(framework::make_ddim({n_dim << 1}),
                                   ctx.GetPlace());
     }
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h
index 791e548372fd1..493dcf72923f3 100644
--- a/paddle/fluid/operators/elementwise/elementwise_div_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h
@@ -196,8 +196,9 @@ class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
index a697d9e007c0c..fb9dbc7fc825c 100644
--- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc
@@ -85,13 +85,13 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel<T> {
           NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {});
       runner_tensor_zeros.Run(stream);
 
-      Tensor x_zero(paddle::framework::proto::VarType::BOOL);
+      Tensor x_zero(experimental::DataType::BOOL);
       x_zero.mutable_data<bool>(x->dims(), place);
       const auto& runner_x_zero =
           NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {});
       runner_x_zero.Run(stream);
 
-      Tensor x_nozero(paddle::framework::proto::VarType::BOOL);
+      Tensor x_nozero(experimental::DataType::BOOL);
       x_nozero.mutable_data<bool>(x->dims(), place);
       const auto& runner_x_nonzero =
           NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {});
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
index 40faf7cbbe8cd..1262562ad521f 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
@@ -48,8 +48,9 @@ class ElementwiseMulOp : public ElementwiseOp {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
index 0c04f7b360e30..cdd1a9b976bd6 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -158,8 +158,9 @@ class ElementwiseOp : public framework::OperatorWithKernel {
       const framework::OpKernelType &expected_kernel_type) const override {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
 #ifdef PADDLE_WITH_MKLDNN
       // When elementwise is first oneDNN op (there was some non oneDNN op
@@ -346,8 +347,9 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
       const framework::OpKernelType &expected_kernel_type) const override {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
@@ -396,8 +398,9 @@ class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel {
       const framework::OpKernelType &expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
@@ -449,8 +452,9 @@ class ElementwiseOpDoubleGradWithoutDXDY
       const framework::OpKernelType &expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
@@ -506,8 +510,9 @@ class ElementwiseOpTripleGrad : public framework::OperatorWithKernel {
       const framework::OpKernelType &expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
index 6c51df5c61ef3..9305f42021192 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
 
 namespace paddle {
@@ -41,9 +42,11 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
 
     auto tz = paddle::framework::vectorize<int64_t>(dout->dims());
-    memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
-    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
-                                           onednn_engine);
+    memory::data_type dout_type = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(dout->dtype()));
+    platform::ReorderMKLDNNHandler handler(
+        tz, framework::TransToProtoVarType(dout->dtype()), dout_type,
+        onednn_engine);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
     auto reorder_src_memory_p = handler.AcquireSrcMemory(
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
index e7bb73340b841..642ee1feb7a5d 100644
--- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
+++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_sub_mkldnn_op.cc
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
 namespace paddle {
 namespace framework {
@@ -41,9 +42,11 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
 
     auto tz = framework::vectorize<int64_t>(dout->dims());
-    memory::data_type dout_type = framework::ToMKLDNNDataType(dout->type());
-    platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type,
-                                           onednn_engine);
+    memory::data_type dout_type = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(dout->dtype()));
+    platform::ReorderMKLDNNHandler handler(
+        tz, framework::TransToProtoVarType(dout->dtype()), dout_type,
+        onednn_engine);
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
     auto reorder_src_memory_p = handler.AcquireSrcMemory(
diff --git a/paddle/fluid/operators/empty_op.h b/paddle/fluid/operators/empty_op.h
index 9c91377683870..42c951385a438 100644
--- a/paddle/fluid/operators/empty_op.h
+++ b/paddle/fluid/operators/empty_op.h
@@ -17,6 +17,7 @@
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/utils.h"
 
@@ -37,7 +38,8 @@ class EmptyKernel : public framework::OpKernel<T> {
     auto shape = GetShape(context);
     out_tensor->Resize(shape);
 
-    out_tensor->mutable_data(context.GetPlace(), dtype);
+    out_tensor->mutable_data(context.GetPlace(),
+                             framework::TransToPtenDataType(dtype));
   }
 };
 
diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc
index 58418d01b81da..d2f8daf732050 100644
--- a/paddle/fluid/operators/expand_v2_op_npu.cc
+++ b/paddle/fluid/operators/expand_v2_op_npu.cc
@@ -116,11 +116,13 @@ class ExpandV2NPUKernel : public framework::OpKernel<T> {
       runner.Run(dev_ctx.stream());
     };
 
-    if (X->type() == framework::proto::VarType::BOOL) {
+    if (framework::TransToProtoVarType(X->dtype()) ==
+        framework::proto::VarType::BOOL) {
       NpuOpRunner::TypeAdapter({*X}, {*Out}, attr_input, dev_ctx, op_func,
                                {framework::proto::VarType::UINT8},
                                {framework::proto::VarType::UINT8});
-    } else if (X->type() == framework::proto::VarType::INT64) {
+    } else if (framework::TransToProtoVarType(X->dtype()) ==
+               framework::proto::VarType::INT64) {
       NpuOpRunner::TypeAdapter({*X}, {*Out}, attr_input, dev_ctx, op_func,
                                {framework::proto::VarType::INT32},
                                {framework::proto::VarType::INT32});
@@ -151,8 +153,8 @@ class ExpandV2NPUGradKernel : public framework::OpKernel<T> {
       axes.push_back(i);
     }
 
-    Tensor tmp_dout(dout->type());
-    Tensor reduced_dout(dx->type());
+    Tensor tmp_dout(dout->dtype());
+    Tensor reduced_dout(dx->dtype());
     tmp_dout.ShareDataWith(*dout);
     if (axes.size() != 0) {
       std::vector<int64_t> reduced_dout_dims;
diff --git a/paddle/fluid/operators/fill_any_like_op_npu.cc b/paddle/fluid/operators/fill_any_like_op_npu.cc
index fc52d2aed2cce..bf00e7104223d 100644
--- a/paddle/fluid/operators/fill_any_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_any_like_op_npu.cc
@@ -54,7 +54,7 @@ class FillAnyLikeNPUKernel : public framework::OpKernel<T> {
         std::isnan(value), false,
         platform::errors::InvalidArgument("The filled value is NaN."));
 
-    Tensor tensor_tmp(data_type);
+    Tensor tensor_tmp(framework::TransToPtenDataType(data_type));
     tensor_tmp.mutable_data<T>({1}, context.GetPlace());
     FillNpuTensorWithConstant<T>(&tensor_tmp, static_cast<T>(value));
 
diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.h b/paddle/fluid/operators/fill_constant_batch_size_like_op.h
index ed3a6618977f5..8e4aa9fdd69ce 100644
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/pten/kernels/funcs/math_function.h"
 
@@ -61,7 +62,8 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel<T> {
     if (cpu_place) {
       auto &dev_ctx = *pool.Get(platform::CPUPlace());
       pten::funcs::SetConstant<platform::CPUDeviceContext, T> functor;
-      out->mutable_data(platform::CPUPlace(), data_type);
+      out->mutable_data(platform::CPUPlace(),
+                        framework::TransToPtenDataType(data_type));
       functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
               out, static_cast<T>(value));
     }
@@ -69,7 +71,8 @@ class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel<T> {
     if (!cpu_place) {
       auto &dev_ctx = *pool.Get(ctx.GetPlace());
       pten::funcs::SetConstant<platform::CUDADeviceContext, T> functor;
-      out->mutable_data(ctx.GetPlace(), data_type);
+      out->mutable_data(ctx.GetPlace(),
+                        framework::TransToPtenDataType(data_type));
       functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
               out, static_cast<T>(value));
     }
diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
index 98e03ea66d852..a65f2708950f0 100644
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc
@@ -71,12 +71,14 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel<T> {
     if (cpu_place) {
       auto &dev_ctx = *pool.Get(platform::CPUPlace());
       pten::funcs::SetConstant<platform::CPUDeviceContext, T> functor;
-      out->mutable_data(platform::CPUPlace(), data_type);
+      out->mutable_data(platform::CPUPlace(),
+                        framework::TransToPtenDataType(data_type));
       functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
               out, static_cast<T>(value));
     } else {
-      out->mutable_data(ctx.GetPlace(), data_type);
-      Tensor tensor_tmp(data_type);
+      out->mutable_data(ctx.GetPlace(),
+                        framework::TransToPtenDataType(data_type));
+      Tensor tensor_tmp(framework::TransToPtenDataType(data_type));
       tensor_tmp.mutable_data<T>({1}, ctx.GetPlace());
       FillNpuTensorWithConstant<T>(&tensor_tmp, value);
 
diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h
index 15c9241275d10..90aa5aabb82da 100644
--- a/paddle/fluid/operators/fill_constant_op.h
+++ b/paddle/fluid/operators/fill_constant_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/utils.h"
@@ -120,14 +121,16 @@ class FillConstantKernel : public framework::OpKernel<T> {
       VLOG(4) << "[CPU] FillConstantKernel"
               << ((data_type == framework::proto::VarType::BF16) ? "<bfloat16>"
                                                                  : "<T>");
-      tensor->mutable_data(platform::CPUPlace(), data_type);
+      tensor->mutable_data(platform::CPUPlace(),
+                           framework::TransToPtenDataType(data_type));
       pten::funcs::SetConstant<platform::CPUDeviceContext, T> functor;
       auto &dev_ctx = *pool.Get(platform::CPUPlace());
       functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
               tensor, static_cast<T>(value));
     } else if (actual_place == 1) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-      tensor->mutable_data(ctx.GetPlace(), data_type);
+      tensor->mutable_data(ctx.GetPlace(),
+                           framework::TransToPtenDataType(data_type));
       pten::funcs::SetConstant<platform::CUDADeviceContext, T> functor;
       auto &dev_ctx = *pool.Get(ctx.GetPlace());
       functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
@@ -138,7 +141,8 @@ class FillConstantKernel : public framework::OpKernel<T> {
 #endif
     } else if (actual_place == 2) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-      tensor->mutable_data(platform::CUDAPinnedPlace(), data_type);
+      tensor->mutable_data(platform::CUDAPinnedPlace(),
+                           framework::TransToPtenDataType(data_type));
       pten::funcs::SetConstant<platform::CUDAPinnedDeviceContext, T> functor;
       auto &dev_ctx = *pool.Get(platform::CUDAPinnedPlace());
       functor(
@@ -150,7 +154,8 @@ class FillConstantKernel : public framework::OpKernel<T> {
 #endif
     } else if (actual_place == 3) {
 #ifdef PADDLE_WITH_XPU
-      tensor->mutable_data(ctx.GetPlace(), data_type);
+      tensor->mutable_data(ctx.GetPlace(),
+                           framework::TransToPtenDataType(data_type));
       pten::funcs::SetConstant<platform::XPUDeviceContext, T> functor;
       auto &dev_ctx = *pool.Get(ctx.GetPlace());
       functor(reinterpret_cast<const platform::XPUDeviceContext &>(dev_ctx),
diff --git a/paddle/fluid/operators/fill_constant_op_mlu.cc b/paddle/fluid/operators/fill_constant_op_mlu.cc
index c265977715ff1..609af914d4df6 100644
--- a/paddle/fluid/operators/fill_constant_op_mlu.cc
+++ b/paddle/fluid/operators/fill_constant_op_mlu.cc
@@ -72,8 +72,9 @@ class FillConstantMLUKernel : public framework::OpKernel<T> {
 
     auto shape = GetShape(ctx);
     out_var->mutable_data<T>(shape, ctx.GetPlace());
-    MLUCnnlTensorDesc output_desc(*out_var, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(out_var->type()));
+    MLUCnnlTensorDesc output_desc(
+        *out_var, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(out_var->dtype())));
     MLUCnnl::Fill(ctx, value, output_desc.get(), GetBasePtr(out_var));
   }
 };
diff --git a/paddle/fluid/operators/fill_constant_op_npu.cc b/paddle/fluid/operators/fill_constant_op_npu.cc
index d7bd25683ec06..2d02faafead26 100644
--- a/paddle/fluid/operators/fill_constant_op_npu.cc
+++ b/paddle/fluid/operators/fill_constant_op_npu.cc
@@ -61,7 +61,7 @@ class FillConstantNPUKernel : public framework::OpKernel<T> {
 
     out_var->mutable_data<T>(shape, ctx.GetPlace());
     if (data_type != framework::proto::VarType::BOOL) {
-      Tensor tensor_value(data_type);
+      Tensor tensor_value(framework::TransToPtenDataType(data_type));
       tensor_value.mutable_data<T>({1}, ctx.GetPlace());
       FillNpuTensorWithConstant<T>(&tensor_value, value);
       NpuOpRunner runner;
diff --git a/paddle/fluid/operators/fill_diagonal_op.cc b/paddle/fluid/operators/fill_diagonal_op.cc
index be3239d504844..edc5fdd6f991e 100644
--- a/paddle/fluid/operators/fill_diagonal_op.cc
+++ b/paddle/fluid/operators/fill_diagonal_op.cc
@@ -138,8 +138,8 @@ class FillIDiagonalGradOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
     // Note: don't get data type from ctx.Input<framework::Tensor>("Input");
-    auto dtype =
-        ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type();
+    auto dtype = framework::TransToProtoVarType(
+        ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type());
     return framework::OpKernelType(dtype, ctx.GetPlace());
   }
 };
diff --git a/paddle/fluid/operators/fill_diagonal_tensor_op.cc b/paddle/fluid/operators/fill_diagonal_tensor_op.cc
index 0c348693202e1..b4ab7c22c861c 100644
--- a/paddle/fluid/operators/fill_diagonal_tensor_op.cc
+++ b/paddle/fluid/operators/fill_diagonal_tensor_op.cc
@@ -187,7 +187,8 @@ class FillDiagonalTensorGradOp : public framework::OperatorWithKernel {
     // Note: don't get data type from ctx.Input<framework::Tensor>("Input");
     auto dtype =
         ctx.Input<framework::Tensor>(framework::GradVarName("Out"))->type();
-    return framework::OpKernelType(dtype, ctx.GetPlace());
+    return framework::OpKernelType(framework::TransToProtoVarType(dtype),
+                                   ctx.GetPlace());
   }
 };
 
diff --git a/paddle/fluid/operators/fill_op.h b/paddle/fluid/operators/fill_op.h
index 02e388bcb40f8..8cf4210da4761 100644
--- a/paddle/fluid/operators/fill_op.h
+++ b/paddle/fluid/operators/fill_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <vector>
 
 #include <algorithm>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/op_registry.h"
 
@@ -48,9 +49,10 @@ class FillKernel : public framework::OpKernel<T> {
     out.Resize(framework::make_ddim(ctx.Attr<std::vector<int>>("shape")));
     auto dtype =
         static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype"));
+    auto pten_dtype = framework::TransToPtenDataType(dtype);
     platform::CPUPlace cpu;
     auto force_cpu = ctx.Attr<bool>("force_cpu");
-    out.mutable_data(force_cpu ? cpu : ctx.GetPlace(), dtype);
+    out.mutable_data(force_cpu ? cpu : ctx.GetPlace(), pten_dtype);
 
     framework::LoDTensor tensor;
 
@@ -59,7 +61,7 @@ class FillKernel : public framework::OpKernel<T> {
     } else {
       // Always make tensor in CPU memory.
       tensor.Resize(out.dims());
-      tensor.mutable_data(cpu, dtype);
+      tensor.mutable_data(cpu, pten_dtype);
     }
 
     framework::VisitDataType(
diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu
index 38326e7560c0d..90a50497f78aa 100644
--- a/paddle/fluid/operators/fused/conv_fusion_op.cu
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cu
@@ -66,8 +66,8 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
     const std::string padding_algorithm =
         ctx.Attr<std::string>("padding_algorithm");
 
-    Tensor transformed_input_channel(input->type());
-    Tensor transformed_output(output->type());
+    Tensor transformed_input_channel(input->dtype());
+    Tensor transformed_output(output->dtype());
     transformed_input_channel = *input;
     transformed_output = *output;
     T* output_data = transformed_output.data<T>();
diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc
index 39c7e52cc465c..d141800d61c0e 100644
--- a/paddle/fluid/operators/fused/fused_attention_op.cc
+++ b/paddle/fluid/operators/fused/fused_attention_op.cc
@@ -163,7 +163,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
     auto input = ctx.Input<Tensor>("X");
-    auto input_data_type = input->type();
+    auto input_data_type = framework::TransToProtoVarType(input->dtype());
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
@@ -453,7 +453,7 @@ class FusedAttentionGradOp : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
     auto input = ctx.Input<Tensor>("X");
-    auto input_data_type = input->type();
+    auto input_data_type = framework::TransToProtoVarType(input->dtype());
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
index e9ad2895e03db..072eb52a8aa4f 100644
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <memory>
 #include <string>
 #include <unordered_map>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 
 namespace paddle {
@@ -139,16 +140,20 @@ framework::OpKernelType FusedBatchNormActOp::GetExpectedKernelType(
   if (input_data_type == framework::proto::VarType::FP64) {
     bn_param_type = framework::proto::VarType::FP64;
   }
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Scale")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Scale")->dtype()),
                     platform::errors::PreconditionNotMet(
                         "Scale input should be of float type"));
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Bias")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Bias")->dtype()),
                     platform::errors::PreconditionNotMet(
                         "Bias input should be of float type"));
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Mean")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Mean")->dtype()),
                     platform::errors::PreconditionNotMet(
                         "Mean input should be of float type"));
-  PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(),
+  PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                       ctx.Input<Tensor>("Variance")->dtype()),
                     platform::errors::PreconditionNotMet(
                         "Variance input should be of float type"));
 
diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
index e825ad30782ad..40b31559fe112 100644
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu
@@ -166,9 +166,9 @@ class FusedBatchNormActKernel<platform::CUDADeviceContext, T>
             /*xDesc=*/data_desc_,
             /*sizeInBytes=*/&reserve_space_size));
 
-    reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(),
+    reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->dtype(),
                                                     reserve_space_size);
-    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(),
+    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(),
                                                   workspace_size);
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cudnnBatchNormalizationForwardTrainingEx(
diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
index 9f446b48b4728..b5870c6d906ce 100644
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
@@ -120,10 +120,12 @@ framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType(
   auto bn_param_type = framework::proto::VarType::FP32;
 
   PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Scale")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Scale")->dtype()),
       platform::errors::InvalidArgument("Scale input should be of float type"));
   PADDLE_ENFORCE_EQ(
-      bn_param_type, ctx.Input<Tensor>("Bias")->type(),
+      bn_param_type,
+      framework::TransToProtoVarType(ctx.Input<Tensor>("Bias")->dtype()),
       platform::errors::InvalidArgument("Bias input should be of float type"));
 
   framework::LibraryType library = framework::LibraryType::kPlain;
diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
index c5bc5b1725516..12768fa83a7df 100644
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu
@@ -144,9 +144,9 @@ class FusedBatchNormAddActKernel<platform::CUDADeviceContext, T>
             /*xDesc=*/data_desc_,
             /*sizeInBytes=*/&reserve_space_size));
 
-    reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(),
+    reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->dtype(),
                                                     reserve_space_size);
-    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(),
+    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(),
                                                   workspace_size);
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cudnnBatchNormalizationForwardTrainingEx(
@@ -277,7 +277,7 @@ class FusedBatchNormAddActGradKernel<platform::CUDADeviceContext, T>
             /*activationDesc=*/activation_desc_,
             /*sizeInBytes=*/&workspace_size));
 
-    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(),
+    workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->dtype(),
                                                   workspace_size);
     PADDLE_ENFORCE_GPU_SUCCESS(
         platform::dynload::cudnnBatchNormalizationBackwardEx(
diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
index d51e0de38009b..3e69bf0806756 100644
--- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc
@@ -159,8 +159,8 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
-    PADDLE_ENFORCE_EQ(ctx.Input<framework::Tensor>("X")->type(),
-                      ctx.Input<framework::Tensor>("Y")->type(),
+    PADDLE_ENFORCE_EQ(ctx.Input<framework::Tensor>("X")->dtype(),
+                      ctx.Input<framework::Tensor>("Y")->dtype(),
                       platform::errors::InvalidArgument(
                           "The element's type of input should be the same."));
     return framework::OpKernelType(
diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc
index 4d270280d389c..a72ab1d1b144c 100644
--- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc
@@ -97,7 +97,7 @@ class EmbeddingEltWiseLayerNormOp : public framework::OperatorWithKernel {
     bool flag = 0;
     for (auto* input : inputs) {
       if (input->IsInitialized() && input->numel() > 0) {
-        input_data_type = input->type();
+        input_data_type = framework::TransToProtoVarType(input->dtype());
         flag = 1;
         break;
       }
diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
index 14a6608836a8a..c2c617b8d5238 100644
--- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
+++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu
@@ -14,6 +14,7 @@
 
 #include <paddle/fluid/platform/device_context.h>
 #include <algorithm>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/operators/math/bert_encoder_functor.h"
@@ -32,8 +33,10 @@ class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel<T> {
     auto embs = context.MultiInput<framework::Tensor>("Embs");
     int input_num = static_cast<int>(ids.size());
 
-    framework::Tensor in_ids_(framework::proto::VarType::INT64),
-        in_embs_(framework::proto::VarType::INT64);
+    framework::Tensor in_ids_(
+        framework::TransToPtenDataType(framework::proto::VarType::INT64)),
+        in_embs_(
+            framework::TransToPtenDataType(framework::proto::VarType::INT64));
     framework::DDim in_dim{input_num};
     int device_id;
 #ifdef PADDLE_WITH_HIP
diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc
index 7da790fc5c6e2..1815a4dd71c78 100644
--- a/paddle/fluid/operators/fused/fused_feedforward_op.cc
+++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc
@@ -289,7 +289,7 @@ class FusedFeedForwardOpGrad : public framework::OperatorWithKernel {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
     auto input = ctx.Input<Tensor>("X");
-    auto input_data_type = input->type();
+    auto input_data_type = framework::TransToProtoVarType(input->dtype());
     return framework::OpKernelType(input_data_type, ctx.GetPlace());
   }
 };
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
index f1deab3e65299..c18eee7912723 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/expect.h"
 #include "paddle/fluid/operators/fused/fusion_gru_op.h"
 #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
@@ -277,13 +278,14 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {
     std::shared_ptr<dnnl::memory> h0_memory_p, weight_h_memory_p,
         weight_x_memory_p;
 
-    if (weight_h->type() == paddle::framework::proto::VarType_Type_FP32) {
+    if (framework::TransToProtoVarType(weight_h->dtype()) ==
+        paddle::framework::proto::VarType_Type_FP32) {
       h0_memory_p = handler.template AcquireH0Memory<float>(h0);
       weight_x_memory_p =
           handler.template AcquireWeightXMemory<float>(weight_x, origin_mode);
       weight_h_memory_p =
           handler.template AcquireWeightHMemory<float>(weight_h, origin_mode);
-    } else if (weight_h->type() ==
+    } else if (framework::TransToProtoVarType(weight_h->dtype()) ==
                paddle::framework::proto::VarType_Type_BF16) {
       h0_memory_p =
           handler.template AcquireH0Memory<paddle::platform::bfloat16>(h0);
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
index dfd88248ede34..f3317a1d49554 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/expect.h"
 #include "paddle/fluid/operators/fused/fusion_lstm_op.h"
 #include "paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h"
@@ -114,7 +115,7 @@ class LSTMMKLDNNHandler
   void ReorderGates(U* weights, int64_t I) {
     size_t inner_block_size = this->OC;
     size_t block_size = inner_block_size * this->G;
-    for (size_t i = 0; i < (size_t)I; ++i) {
+    for (size_t i = 0; i < (size_t)I; ++i) {  // NOLINT
       size_t offset = i * block_size;
 
       U* base_pos = weights + offset;
@@ -341,13 +342,14 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel<T> {
     std::shared_ptr<dnnl::memory> h0_memory_p, weight_h_memory_p,
         weight_x_memory_p;
 
-    if (weight_h->type() == paddle::framework::proto::VarType_Type_FP32) {
+    if (framework::TransToProtoVarType(weight_h->dtype()) ==
+        paddle::framework::proto::VarType_Type_FP32) {
       h0_memory_p = handler.template AcquireH0Memory<float>(h0);
       weight_x_memory_p =
           handler.template AcquireWeightXMemory<float>(weight_x);
       weight_h_memory_p =
           handler.template AcquireWeightHMemory<float>(weight_h);
-    } else if (weight_h->type() ==
+    } else if (framework::TransToProtoVarType(weight_h->dtype()) ==
                paddle::framework::proto::VarType_Type_BF16) {
       h0_memory_p =
           handler.template AcquireH0Memory<paddle::platform::bfloat16>(h0);
diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc
index 79e813c52181c..debab26aa6937 100644
--- a/paddle/fluid/operators/fused/resnet_unit_op.cc
+++ b/paddle/fluid/operators/fused/resnet_unit_op.cc
@@ -180,10 +180,12 @@ class ResNetUnitOp : public framework::OperatorWithKernel {
     // and var tensors should be float when input tensor's dtype is float16.
     auto bn_param_type = framework::proto::VarType::FP32;
 
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("ScaleX")->type(),
+    PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("ScaleX")->dtype()),
                       platform::errors::InvalidArgument(
                           "Scale input should be of float type"));
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("BiasX")->type(),
+    PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("BiasX")->dtype()),
                       platform::errors::InvalidArgument(
                           "Bias input should be of float type"));
     framework::LibraryType library = framework::LibraryType::kPlain;
diff --git a/paddle/fluid/operators/gather_nd_op.cu b/paddle/fluid/operators/gather_nd_op.cu
index ee51df68c18e5..3a8099c0672ad 100644
--- a/paddle/fluid/operators/gather_nd_op.cu
+++ b/paddle/fluid/operators/gather_nd_op.cu
@@ -33,7 +33,7 @@ class GatherNdOpCUDAKernel : public framework::OpKernel<T> {
 
     output->mutable_data<T>(ctx.GetPlace());
     if (x->numel() == 0) return;
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -71,7 +71,7 @@ class GatherNdGradOpCUDAKernel : public framework::OpKernel<T> {
     dxt.device(place) = dxt.constant(static_cast<T>(0));
     if (dO->numel() == 0) return;
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
 
diff --git a/paddle/fluid/operators/gather_nd_op.h b/paddle/fluid/operators/gather_nd_op.h
index e89db1e8732e2..f458c0e18013b 100644
--- a/paddle/fluid/operators/gather_nd_op.h
+++ b/paddle/fluid/operators/gather_nd_op.h
@@ -38,7 +38,7 @@ class GatherNdOpKernel : public framework::OpKernel<T> {
     output->mutable_data<T>(ctx.GetPlace());
     if (x->numel() == 0) return;
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -75,7 +75,7 @@ class GatherNdGradOpKernel : public framework::OpKernel<T> {
     dxt.device(place) = dxt.constant(static_cast<T>(0));
     if (dO->numel() == 0) return;
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/gather_nd_op_npu.cc b/paddle/fluid/operators/gather_nd_op_npu.cc
index 054ccb6e59bab..dd6dcbb8d5b59 100644
--- a/paddle/fluid/operators/gather_nd_op_npu.cc
+++ b/paddle/fluid/operators/gather_nd_op_npu.cc
@@ -38,7 +38,7 @@ class GatherNdNPUKernel : public framework::OpKernel<T> {
       return;
     }
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/gather_nd_op_xpu.cc b/paddle/fluid/operators/gather_nd_op_xpu.cc
index a86731bba8ab8..0c67ec3f38ef2 100644
--- a/paddle/fluid/operators/gather_nd_op_xpu.cc
+++ b/paddle/fluid/operators/gather_nd_op_xpu.cc
@@ -32,7 +32,7 @@ class GatherNdXPUKernel : public framework::OpKernel<T> {
       return;
     }
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/gather_op.cu b/paddle/fluid/operators/gather_op.cu
index 6e27d95e01855..19568835a6e96 100644
--- a/paddle/fluid/operators/gather_op.cu
+++ b/paddle/fluid/operators/gather_op.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/operators/gather.cu.h"
 #include "paddle/fluid/operators/gather_op.h"
@@ -38,7 +39,8 @@ class GatherOpCUDAKernel : public framework::OpKernel<T> {
       Tensor cpu_axis;
       const Tensor *axis_tensor = ctx.Input<Tensor>("Axis");
       framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis);
-      const auto &axis_type = axis_tensor->type();
+      const auto &axis_type =
+          framework::TransToProtoVarType(axis_tensor->dtype());
       if (axis_type == framework::proto::VarType::INT32) {
         axis = static_cast<int>(cpu_axis.data<int32_t>()[0]);
       } else if (axis_type == framework::proto::VarType::INT64) {
@@ -46,7 +48,7 @@ class GatherOpCUDAKernel : public framework::OpKernel<T> {
       }
     }
     const auto &place = ctx.GetPlace();
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (axis != 0) {
       if (index_type == framework::proto::VarType::INT32) {
         GatherV2CUDAFunction<T, int32_t>(x, index, axis, output, place, ctx);
@@ -82,7 +84,8 @@ class GatherGradOpCUDAKernel : public framework::OpKernel<T> {
       const Tensor *axis_tensor = ctx.Input<Tensor>("Axis");
       Tensor cpu_axis;
       framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis);
-      const auto &axis_type = axis_tensor->type();
+      const auto &axis_type =
+          framework::TransToProtoVarType(axis_tensor->dtype());
       if (axis_type == framework::proto::VarType::INT32) {
         axis = static_cast<int>(cpu_axis.data<int32_t>()[0]);
       } else if (axis_type == framework::proto::VarType::INT64) {
@@ -90,7 +93,7 @@ class GatherGradOpCUDAKernel : public framework::OpKernel<T> {
       }
     }
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (axis != 0) {
       if (index_type == framework::proto::VarType::INT32) {
         GatherV2GradCUDAFunction<T, int32_t>(dO, index, axis, dX,
diff --git a/paddle/fluid/operators/gather_op.h b/paddle/fluid/operators/gather_op.h
index a2570c3e014e1..016c2b398daaa 100644
--- a/paddle/fluid/operators/gather_op.h
+++ b/paddle/fluid/operators/gather_op.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/gather.h"
@@ -39,7 +40,8 @@ class GatherOpKernel : public framework::OpKernel<T> {
     // get axis from tensor
     if (ctx.HasInput("Axis")) {
       const Tensor *axis_tensor = ctx.Input<Tensor>("Axis");
-      const auto &axis_type = axis_tensor->type();
+      const auto &axis_type =
+          framework::TransToProtoVarType(axis_tensor->dtype());
       if (axis_type == framework::proto::VarType::INT32) {
         axis = static_cast<int>(axis_tensor->data<int32_t>()[0]);
       } else if (axis_type == framework::proto::VarType::INT64) {
@@ -47,7 +49,7 @@ class GatherOpKernel : public framework::OpKernel<T> {
       }
     }
     const auto &place = ctx.GetPlace();
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (axis != 0) {
       if (index_type == framework::proto::VarType::INT32) {
         GatherV2Function<T, int32_t>(x, index, axis, output, place);
@@ -82,14 +84,15 @@ class GatherGradientOpKernel : public framework::OpKernel<T> {
     int axis = ctx.Attr<int>("axis");
     if (ctx.HasInput("Axis")) {
       const Tensor *axis_tensor = ctx.Input<Tensor>("Axis");
-      const auto &axis_type = axis_tensor->type();
+      const auto &axis_type =
+          framework::TransToProtoVarType(axis_tensor->dtype());
       if (axis_type == framework::proto::VarType::INT32) {
         axis = static_cast<int>(axis_tensor->data<int32_t>()[0]);
       } else if (axis_type == framework::proto::VarType::INT64) {
         axis = static_cast<int>(axis_tensor->data<int64_t>()[0]);
       }
     }
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
 
     if (axis != 0) {
       if (index_type == framework::proto::VarType::INT32) {
diff --git a/paddle/fluid/operators/gather_op_xpu.cc b/paddle/fluid/operators/gather_op_xpu.cc
index d9fdbb2a9dd75..99e4b84bc44ec 100644
--- a/paddle/fluid/operators/gather_op_xpu.cc
+++ b/paddle/fluid/operators/gather_op_xpu.cc
@@ -64,7 +64,8 @@ class GatherOpXPUKernel : public framework::OpKernel<T> {
 
     auto &dev_ctx = ctx.template device_context<platform::XPUDeviceContext>();
     int r = XPU_SUCCESS;
-    if (index->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(index->dtype()) ==
+        framework::proto::VarType::INT32) {
       r = xpu::gather<XPUType, int>(
           dev_ctx.x_context(), reinterpret_cast<const XPUType *>(x->data<T>()),
           index->data<int>(), reinterpret_cast<XPUType *>(output->data<T>()),
@@ -129,7 +130,8 @@ class GatherGradOpXPUKernel : public framework::OpKernel<T> {
     dx->mutable_data<T>(ctx.GetPlace());
 
     int r = XPU_SUCCESS;
-    if (index->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(index->dtype()) ==
+        framework::proto::VarType::INT32) {
       r = xpu::gather_grad<XPUType, int>(
           dev_ctx.x_context(),
           reinterpret_cast<const XPUType *>(dout->data<T>()),
diff --git a/paddle/fluid/operators/gaussian_random_op_npu.cc b/paddle/fluid/operators/gaussian_random_op_npu.cc
old mode 100755
new mode 100644
index b5ca26edf8fae..0a64db1823d98
--- a/paddle/fluid/operators/gaussian_random_op_npu.cc
+++ b/paddle/fluid/operators/gaussian_random_op_npu.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <random>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/generator.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/op_version_registry.h"
@@ -35,7 +36,7 @@ class NPUGaussianRandomKernel : public framework::OpKernel<T> {
     auto* tensor = context.Output<framework::Tensor>("Out");
     tensor->mutable_data<T>(context.GetPlace());
 
-    Tensor cpu_tensor(tensor->type());
+    Tensor cpu_tensor(tensor->dtype());
     cpu_tensor.Resize(tensor->dims());
     T* cpu_data = cpu_tensor.mutable_data<T>(platform::CPUPlace());
     std::normal_distribution<T> dist(mean, std);
diff --git a/paddle/fluid/operators/graph_send_recv_op.cu b/paddle/fluid/operators/graph_send_recv_op.cu
index 446ad2d97a7fb..f43d31814ac38 100644
--- a/paddle/fluid/operators/graph_send_recv_op.cu
+++ b/paddle/fluid/operators/graph_send_recv_op.cu
@@ -360,7 +360,7 @@ class GraphSendRecvOpCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* src_index = ctx.Input<Tensor>("Src_index");
     auto* dst_index = ctx.Input<Tensor>("Dst_index");
-    auto index_type = src_index->type();
+    auto index_type = framework::TransToProtoVarType(src_index->dtype());
 
     if (index_type == framework::proto::VarType::INT32) {
       GraphSendRecvOpCUDAKernelLaunchHelper<DeviceContext, T, int>(
@@ -383,7 +383,7 @@ class GraphSendRecvGradOpCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* src_index = ctx.Input<Tensor>("Dst_index");
     auto* dst_index = ctx.Input<Tensor>("Src_index");
-    auto index_type = src_index->type();
+    auto index_type = framework::TransToProtoVarType(src_index->dtype());
 
     if (index_type == framework::proto::VarType::INT32) {
       GraphSendRecvGradOpCUDAKernelLaunchHelper<DeviceContext, T, int>(
diff --git a/paddle/fluid/operators/graph_send_recv_op.h b/paddle/fluid/operators/graph_send_recv_op.h
index 1c7ea74be2ff4..8d8111e0ee845 100644
--- a/paddle/fluid/operators/graph_send_recv_op.h
+++ b/paddle/fluid/operators/graph_send_recv_op.h
@@ -249,7 +249,7 @@ class GraphSendRecvOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* src_index = ctx.Input<Tensor>("Src_index");
-    auto index_type = src_index->type();
+    auto index_type = framework::TransToProtoVarType(src_index->dtype());
 
     if (index_type == framework::proto::VarType::INT32) {
       GraphSendRecvOpKernelLaunchHelper<DeviceContext, T, int>(ctx, *src_index);
@@ -270,7 +270,7 @@ class GraphSendRecvGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* src_index = ctx.Input<Tensor>("Dst_index");
-    auto index_type = src_index->type();
+    auto index_type = framework::TransToProtoVarType(src_index->dtype());
 
     if (index_type == framework::proto::VarType::INT32) {
       GraphSendRecvGradOpKernelLaunchHelper<DeviceContext, T, int>(ctx,
diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc
index 2c7fd8f4173ea..2d284fb516e62 100644
--- a/paddle/fluid/operators/group_norm_op.cc
+++ b/paddle/fluid/operators/group_norm_op.cc
@@ -204,7 +204,8 @@ class GroupNormGradOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE_NOT_NULL(
         t, platform::errors::InvalidArgument(
                "Input(Y@GRAD) Tensor of GroupNormGradOp should not be null"));
-    return framework::OpKernelType(t->type(), ctx.GetPlace());
+    return framework::OpKernelType(framework::TransToProtoVarType(t->dtype()),
+                                   ctx.GetPlace());
   }
 };
 
diff --git a/paddle/fluid/operators/increment_op_npu.cc b/paddle/fluid/operators/increment_op_npu.cc
index c88dd1a16b657..1c7c8a19110bc 100644
--- a/paddle/fluid/operators/increment_op_npu.cc
+++ b/paddle/fluid/operators/increment_op_npu.cc
@@ -27,7 +27,7 @@ class IncrementalNPUKernel : public framework::OpKernel<T> {
     float step = context.Attr<float>("step");
     out_tensor->mutable_data<T>(context.GetPlace());
 
-    Tensor step_tensor(x_tensor->type());
+    Tensor step_tensor(x_tensor->dtype());
 
     step_tensor.mutable_data<T>({1}, context.GetPlace());
     FillNpuTensorWithConstant<T>(&step_tensor, static_cast<T>(step));
diff --git a/paddle/fluid/operators/index_sample_op.cu b/paddle/fluid/operators/index_sample_op.cu
index e145c555dc552..5e2ab922850ff 100644
--- a/paddle/fluid/operators/index_sample_op.cu
+++ b/paddle/fluid/operators/index_sample_op.cu
@@ -85,7 +85,7 @@ class IndexSampleKernel<platform::CUDADeviceContext, T>
     auto* index = ctx.Input<LoDTensor>("Index");
     auto* output = ctx.Output<LoDTensor>("Out");
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                             index_type == framework::proto::VarType::INT32;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -144,7 +144,7 @@ class IndexSampleGradKernel<platform::CUDADeviceContext, T>
     const auto* output_grad_data = output_grad->data<T>();
     auto* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                             index_type == framework::proto::VarType::INT32;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/index_sample_op.h b/paddle/fluid/operators/index_sample_op.h
index ab03c17fd1a6f..a92b205f3f761 100644
--- a/paddle/fluid/operators/index_sample_op.h
+++ b/paddle/fluid/operators/index_sample_op.h
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 #include "gflags/gflags.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 
 namespace paddle {
@@ -93,7 +94,8 @@ class IndexSampleKernel : public framework::OpKernel<T> {
     auto *out_var = ctx.OutputVar("Out");
     auto *out_tensor = out_var->GetMutable<framework::LoDTensor>();
 
-    const auto &index_type = index_tensor.type();
+    const auto &index_type =
+        framework::TransToProtoVarType(index_tensor.dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -169,7 +171,8 @@ class IndexSampleGradKernel : public framework::OpKernel<T> {
     auto &out_grad_tensor = out_grad_var->Get<LoDTensor>();
     auto *x_grad_tensor = x_grad_var->GetMutable<framework::LoDTensor>();
 
-    const auto &index_type = index_tensor.type();
+    const auto &index_type =
+        framework::TransToProtoVarType(index_tensor.dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc
index 9253f77345dcb..193a27e07e58d 100644
--- a/paddle/fluid/operators/index_sample_op_npu.cc
+++ b/paddle/fluid/operators/index_sample_op_npu.cc
@@ -60,7 +60,7 @@ class IndexSampleNPUKernel : public framework::OpKernel<T> {
     auto* out = ctx.Output<framework::LoDTensor>("Out");
     out->mutable_data<T>(ctx.GetPlace());
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       IndexSampleGather<int32_t>(dev_ctx, index, input, out);
     } else {
@@ -113,7 +113,7 @@ class IndexSampleGradNPUKernel : public framework::OpKernel<T> {
         ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
     x_grad->mutable_data<T>(ctx.GetPlace());
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       IndexSampleGradScatter<int32_t>(dev_ctx, index, out_grad, x_grad);
     } else {
diff --git a/paddle/fluid/operators/index_select_op.cu b/paddle/fluid/operators/index_select_op.cu
index acf959896f949..ad0f13e9c2c47 100644
--- a/paddle/fluid/operators/index_select_op.cu
+++ b/paddle/fluid/operators/index_select_op.cu
@@ -84,7 +84,7 @@ class IndexSelectCUDAKernel : public framework::OpKernel<T> {
     int64_t size = output_dim[dim];
     int64_t delta = input_dim[dim] - size;
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                             index_type == framework::proto::VarType::INT32;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -142,7 +142,7 @@ class IndexSelectGradCUDAKernel : public framework::OpKernel<T> {
     int64_t size = output_dim[dim];
     int64_t delta = input_dim[dim] - size;
 
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                             index_type == framework::proto::VarType::INT32;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h
index b157f775d50eb..5fee9bfd8547c 100644
--- a/paddle/fluid/operators/index_select_op.h
+++ b/paddle/fluid/operators/index_select_op.h
@@ -103,7 +103,7 @@ class IndexSelectKernel : public framework::OpKernel<T> {
     if (dim < 0) {
       dim += inputs.dims().size();
     }
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -211,7 +211,7 @@ class IndexSelectGradKernel : public framework::OpKernel<T> {
     if (dim < 0) {
       dim += out_grad->dims().size();
     }
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
 
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc
index 617bf242345c4..bce7a3c1caae3 100644
--- a/paddle/fluid/operators/index_select_op_npu.cc
+++ b/paddle/fluid/operators/index_select_op_npu.cc
@@ -65,7 +65,8 @@ class IndexSelectGradNPUKernel : public framework::OpKernel<T> {
     }
 
     Tensor casted_index;
-    if (index->type() != framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(index->dtype()) !=
+        framework::proto::VarType::INT32) {
       casted_index.mutable_data<int32_t>(index->dims(), ctx.GetPlace());
       const auto& cast_runner = NpuOpRunner("Cast", {*index}, {casted_index},
                                             {{"dst_type", ACL_INT32}});
diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc
index 7a112292c8fc5..e0779249c41ad 100644
--- a/paddle/fluid/operators/inplace_abn_op.cc
+++ b/paddle/fluid/operators/inplace_abn_op.cc
@@ -36,18 +36,23 @@ class InplaceABNOp : public paddle::operators::BatchNormOp {
     if (input_data_type == framework::proto::VarType::FP64) {
       bn_param_type = framework::proto::VarType::FP64;
     }
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Scale")->type(),
+    PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("Scale")->dtype()),
                       platform::errors::InvalidArgument(
                           "Scale input should be of float type"));
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Bias")->type(),
+    PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("Bias")->dtype()),
                       platform::errors::InvalidArgument(
                           "Bias input should be of float type"));
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Mean")->type(),
+    PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("Mean")->dtype()),
                       platform::errors::InvalidArgument(
                           "Mean input should be of float type"));
-    PADDLE_ENFORCE_EQ(bn_param_type, ctx.Input<Tensor>("Variance")->type(),
-                      platform::errors::InvalidArgument(
-                          "Variance input should be of float type"));
+    PADDLE_ENFORCE_EQ(
+        bn_param_type,
+        framework::TransToProtoVarType(ctx.Input<Tensor>("Variance")->dtype()),
+        platform::errors::InvalidArgument(
+            "Variance input should be of float type"));
 
     framework::LibraryType library = framework::LibraryType::kPlain;
     framework::DataLayout layout = framework::DataLayout::kAnyLayout;
@@ -117,7 +122,8 @@ class InplaceABNGradOp : public paddle::operators::BatchNormGradOp {
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     const auto* var = ctx.InputVar(framework::GradVarName("Y"));
-    auto input_data_type = ctx.Input<Tensor>("Y")->type();
+    auto input_data_type =
+        framework::TransToProtoVarType(ctx.Input<Tensor>("Y")->dtype());
     if (var == nullptr) {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "can't find gradient variable of Y"));
diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc
index 8c650c6437632..5d1fb69811829 100644
--- a/paddle/fluid/operators/instance_norm_op.cc
+++ b/paddle/fluid/operators/instance_norm_op.cc
@@ -115,12 +115,14 @@ framework::OpKernelType InstanceNormOp::GetExpectedKernelType(
     in_param_type = framework::proto::VarType::FP64;
   }
   if (ctx.HasInput("Scale")) {
-    PADDLE_ENFORCE_EQ(in_param_type, ctx.Input<Tensor>("Scale")->type(),
+    PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("Scale")->dtype()),
                       platform::errors::InvalidArgument(
                           "Scale input should be of float type"));
   }
   if (ctx.HasInput("Bias")) {
-    PADDLE_ENFORCE_EQ(in_param_type, ctx.Input<Tensor>("Bias")->type(),
+    PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType(
+                                         ctx.Input<Tensor>("Bias")->dtype()),
                       platform::errors::InvalidArgument(
                           "Bias input should be of float type"));
   }
diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc
index 8c76961cd6a3c..bf29c2aabb801 100644
--- a/paddle/fluid/operators/interpolate_v2_op_npu.cc
+++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc
@@ -65,7 +65,8 @@ struct InterpolateFunction {
     runner.Run(stream);
   }
   void Cast(const Tensor* x, Tensor* y) {
-    auto dst_dtype = ConvertToNpuDtype(y->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(y->dtype()));
     const auto& runner = NpuOpRunner(
         "Cast", {*x}, {*y}, {{"dst_type", static_cast<int>(dst_dtype)}});
     runner.Run(stream);
@@ -146,7 +147,7 @@ struct InterpolateFunction {
 
 template <>
 void InterpolateFunction<fp16>::Arange(int n, Tensor* x) {
-  Tensor x_fp32(framework::proto::VarType::FP32);
+  Tensor x_fp32(experimental::DataType::FLOAT32);
   x_fp32.mutable_data<float>(x->dims(), place);
   FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
   const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {});
diff --git a/paddle/fluid/operators/ipu/ipu_runtime_op.cc b/paddle/fluid/operators/ipu/ipu_runtime_op.cc
index 3b6982d4b2b8e..8cbf1a018a570 100644
--- a/paddle/fluid/operators/ipu/ipu_runtime_op.cc
+++ b/paddle/fluid/operators/ipu/ipu_runtime_op.cc
@@ -46,7 +46,7 @@ class IpuRuntimeOp : public framework::OperatorBase {
     for (size_t i = 0; i < outputs.size(); ++i) {
       auto* out = outputs[i];
       if (out->dims().size() == 0) {
-        auto tensor_dtype = out->type();
+        auto tensor_dtype = framework::TransToProtoVarType(out->dtype());
         auto sizeof_dtype = framework::SizeOfType(tensor_dtype);
         int64_t dim = out->memory_size() / sizeof_dtype;
         out->Resize({dim});
diff --git a/paddle/fluid/operators/isclose_op.h b/paddle/fluid/operators/isclose_op.h
index 7f5052c1e6655..cde5d2afbf009 100644
--- a/paddle/fluid/operators/isclose_op.h
+++ b/paddle/fluid/operators/isclose_op.h
@@ -59,10 +59,13 @@ class IscloseKernel : public framework::OpKernel<T> {
           rtol->numel(), 1,
           platform::errors::InvalidArgument(
               "Input(Rtol) size must be 1, but get %d.", rtol->numel()));
-      PADDLE_ENFORCE_EQ(rtol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Rtol) type must be double, but get %s.",
-                            framework::DataTypeToString(rtol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(rtol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Rtol) type must be double, but get %s.",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(rtol->dtype()))));
       rtol_v = get_tensor_value(dev_ctx, *rtol);
     }
     if (ctx.HasInput("Atol")) {
@@ -71,10 +74,13 @@ class IscloseKernel : public framework::OpKernel<T> {
           atol->numel(), 1,
           platform::errors::InvalidArgument(
               "Input(Atol) size must be 1, but get %d", atol->numel()));
-      PADDLE_ENFORCE_EQ(atol->type(), framework::proto::VarType::FP64,
-                        platform::errors::InvalidArgument(
-                            "Input(Atol) type must be double, but get %s",
-                            framework::DataTypeToString(atol->type())));
+      PADDLE_ENFORCE_EQ(
+          framework::TransToProtoVarType(atol->dtype()),
+          framework::proto::VarType::FP64,
+          platform::errors::InvalidArgument(
+              "Input(Atol) type must be double, but get %s",
+              framework::DataTypeToString(
+                  framework::TransToProtoVarType(atol->dtype()))));
       atol_v = get_tensor_value(dev_ctx, *atol);
     }
 
diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc
index 7fdb0599ebfd1..247fcb6814f24 100644
--- a/paddle/fluid/operators/isfinite_op.cc
+++ b/paddle/fluid/operators/isfinite_op.cc
@@ -54,9 +54,11 @@ class OverflowOp : public framework::OperatorWithKernel {
     int dtype = -1;
     auto *x_var = ctx.InputVar("X");
     if (x_var->IsType<framework::LoDTensor>()) {
-      dtype = x_var->Get<framework::LoDTensor>().type();
+      dtype = framework::TransToProtoVarType(
+          x_var->Get<framework::LoDTensor>().type());
     } else if (x_var->IsType<pten::SelectedRows>()) {
-      dtype = x_var->Get<pten::SelectedRows>().value().type();
+      dtype = framework::TransToProtoVarType(
+          x_var->Get<pten::SelectedRows>().value().type());
     } else {
       PADDLE_ENFORCE_EQ(
           true, false,
diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc
index 05a394e682d67..1085bcd72707c 100644
--- a/paddle/fluid/operators/isfinite_v2_op.cc
+++ b/paddle/fluid/operators/isfinite_v2_op.cc
@@ -61,9 +61,11 @@ class OverflowV2Op : public framework::OperatorWithKernel {
     int dtype = -1;
     auto *x_var = ctx.InputVar("X");
     if (x_var->IsType<framework::LoDTensor>()) {
-      dtype = x_var->Get<framework::LoDTensor>().type();
+      dtype = framework::TransToProtoVarType(
+          x_var->Get<framework::LoDTensor>().dtype());
     } else if (x_var->IsType<pten::SelectedRows>()) {
-      dtype = x_var->Get<pten::SelectedRows>().value().type();
+      dtype = framework::TransToProtoVarType(
+          x_var->Get<pten::SelectedRows>().value().dtype());
     } else {
       PADDLE_THROW(plat::errors::InvalidArgument(
           "Cannot find the input data type by all input data"));
diff --git a/paddle/fluid/operators/kron_op.cc b/paddle/fluid/operators/kron_op.cc
index 308330313a976..3168c0c43b640 100644
--- a/paddle/fluid/operators/kron_op.cc
+++ b/paddle/fluid/operators/kron_op.cc
@@ -62,8 +62,9 @@ class KronOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
@@ -139,8 +140,9 @@ class KronGradOp : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/layer_norm_op.cu b/paddle/fluid/operators/layer_norm_op.cu
index ef4f0c6ba7063..38b4b1b24b69f 100644
--- a/paddle/fluid/operators/layer_norm_op.cu
+++ b/paddle/fluid/operators/layer_norm_op.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/layer_norm_kernel.cu.h"
 #include "paddle/fluid/operators/layer_norm_op.h"
 #include "paddle/fluid/platform/float16.h"
@@ -67,18 +68,22 @@ class LayerNormKernel<platform::CUDADeviceContext, T>
     auto *void_scale_data = (scale == nullptr ? nullptr : scale->data());
     auto *void_bias_data = (bias == nullptr ? nullptr : bias->data());
 
-    framework::proto::VarType::Type x_dtype = x->type();
+    framework::proto::VarType::Type x_dtype =
+        framework::TransToProtoVarType(x->dtype());
     framework::proto::VarType::Type scale_bias_dtype;
     if (void_scale_data != nullptr) {
-      scale_bias_dtype = scale->type();
+      scale_bias_dtype = framework::TransToProtoVarType(scale->dtype());
       if (void_bias_data != nullptr) {
-        PADDLE_ENFORCE_EQ(scale_bias_dtype, bias->type(),
+        PADDLE_ENFORCE_EQ(scale_bias_dtype,
+                          framework::TransToProtoVarType(bias->dtype()),
                           platform::errors::InvalidArgument(
                               "Thie Scale and Bias of layer_norm op "
                               "should have the same data type."));
       }
     } else {
-      scale_bias_dtype = (void_bias_data != nullptr ? bias->type() : x_dtype);
+      scale_bias_dtype = (void_bias_data != nullptr
+                              ? framework::TransToProtoVarType(bias->dtype())
+                              : x_dtype);
     }
 
     bool is_scale_bias_same_dtype_with_x = x_dtype == scale_bias_dtype;
@@ -193,16 +198,17 @@ class LayerNormGradKernel<platform::CUDADeviceContext, T>
     auto *d_x_data =
         (d_x == nullptr ? nullptr : d_x->mutable_data<T>(ctx.GetPlace()));
 
-    framework::proto::VarType::Type x_dtype = x->type();
+    framework::proto::VarType::Type x_dtype =
+        framework::TransToProtoVarType(x->dtype());
     framework::proto::VarType::Type scale_bias_dtype;
     if (scale != nullptr) {
-      scale_bias_dtype = scale->type();
+      scale_bias_dtype = framework::TransToProtoVarType(scale->dtype());
     } else {
       // FIXME(zengjinle): do not find a better way to get the right
       // data type of the d_scale and d_bias if scale == nullptr.
       auto *bias = ctx.Input<Tensor>("Bias");
       if (bias != nullptr) {
-        scale_bias_dtype = bias->saved_type();
+        scale_bias_dtype = framework::TransToProtoVarType(bias->dtype());
       } else {
         scale_bias_dtype = x_dtype;
       }
diff --git a/paddle/fluid/operators/layer_norm_op_npu.cc b/paddle/fluid/operators/layer_norm_op_npu.cc
index 8cf64b0f8cf6d..13243778cd3e7 100644
--- a/paddle/fluid/operators/layer_norm_op_npu.cc
+++ b/paddle/fluid/operators/layer_norm_op_npu.cc
@@ -105,11 +105,14 @@ class LayerNormNPUKernel : public framework::OpKernel<T> {
 
     // cast scale from LayerNormParamType to T if needed
     Tensor cast_scale(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        scale->type() == framework::proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(scale->dtype()) ==
+            framework::proto::VarType::FP32) {
       cast_scale.Resize(scale->dims());
       cast_scale.mutable_data<T>(ctx.GetPlace());
-      auto dst_dtype = ConvertToNpuDtype(x->type());
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
       const auto& runner_cast_scale =
           NpuOpRunner("Cast", {*scale}, {cast_scale},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -120,11 +123,14 @@ class LayerNormNPUKernel : public framework::OpKernel<T> {
 
     // cast bias from LayerNormParamType to T if needed
     Tensor cast_bias(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        bias->type() == framework::proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(bias->dtype()) ==
+            framework::proto::VarType::FP32) {
       cast_bias.Resize(bias->dims());
       cast_bias.mutable_data<T>(ctx.GetPlace());
-      auto dst_dtype = ConvertToNpuDtype(x->type());
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
       const auto& runner_cast_bias =
           NpuOpRunner("Cast", {*bias}, {cast_bias},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -138,9 +144,12 @@ class LayerNormNPUKernel : public framework::OpKernel<T> {
     // mean should be of  U type
     Tensor* tmp_mean = mean;
     Tensor cast_mean(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        (scale->type() == framework::proto::VarType::FP32 ||
-         bias->type() == framework::proto::VarType::FP32)) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        (framework::TransToProtoVarType(scale->dtype()) ==
+             framework::proto::VarType::FP32 ||
+         framework::TransToProtoVarType(bias->dtype()) ==
+             framework::proto::VarType::FP32)) {
       cast_mean.Resize(mean->dims());
       cast_mean.mutable_data<T>(ctx.GetPlace());
       tmp_mean = &cast_mean;
@@ -152,9 +161,12 @@ class LayerNormNPUKernel : public framework::OpKernel<T> {
     // same for variance
     Tensor* tmp_variance = variance;
     Tensor cast_variance(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        (scale->type() == framework::proto::VarType::FP32 ||
-         bias->type() == framework::proto::VarType::FP32)) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        (framework::TransToProtoVarType(scale->dtype()) ==
+             framework::proto::VarType::FP32 ||
+         framework::TransToProtoVarType(bias->dtype()) ==
+             framework::proto::VarType::FP32)) {
       cast_variance.Resize(variance->dims());
       cast_variance.mutable_data<T>(ctx.GetPlace());
       tmp_variance = &cast_variance;
@@ -171,18 +183,24 @@ class LayerNormNPUKernel : public framework::OpKernel<T> {
     runner.Run(stream);
 
     // cast back from FP16 to FP32
-    if (x->type() == framework::proto::VarType::FP16 &&
-        mean->type() == framework::proto::VarType::FP32) {
-      auto dst_dtype = ConvertToNpuDtype(mean->type());
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(mean->dtype()) ==
+            framework::proto::VarType::FP32) {
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(mean->type()));
       const auto& runner_cast_mean =
           NpuOpRunner("Cast", {*tmp_mean}, {*mean},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
       runner_cast_mean.Run(stream);
     }
     // same for variance
-    if (x->type() == framework::proto::VarType::FP16 &&
-        variance->type() == framework::proto::VarType::FP32) {
-      auto dst_dtype = ConvertToNpuDtype(variance->type());
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(variance->dtype()) ==
+            framework::proto::VarType::FP32) {
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(variance->type()));
       const auto& runner_cast_variance =
           NpuOpRunner("Cast", {*tmp_variance}, {*variance},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -260,11 +278,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
 
     // cast scale from LayerNormParamType to T if needed
     Tensor cast_scale(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        scale->type() == framework::proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(scale->dtype()) ==
+            framework::proto::VarType::FP32) {
       cast_scale.Resize(scale->dims());
       cast_scale.mutable_data<T>(ctx.GetPlace());
-      auto dst_dtype = ConvertToNpuDtype(x->type());
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
       const auto& runner_cast_scale =
           NpuOpRunner("Cast", {*scale}, {cast_scale},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -275,11 +296,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
 
     // cast mean from LayerNormParamType to T if needed
     Tensor cast_mean(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        mean->type() == framework::proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(mean->dtype()) ==
+            framework::proto::VarType::FP32) {
       cast_mean.Resize(mean->dims());
       cast_mean.mutable_data<T>(ctx.GetPlace());
-      auto dst_dtype = ConvertToNpuDtype(x->type());
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
       const auto& runner_cast_mean =
           NpuOpRunner("Cast", {*mean}, {cast_mean},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -290,11 +314,14 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
 
     // cast variance from LayerNormParamType to T if needed
     Tensor cast_variance(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        variance->type() == framework::proto::VarType::FP32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(variance->dtype()) ==
+            framework::proto::VarType::FP32) {
       cast_variance.Resize(variance->dims());
       cast_variance.mutable_data<T>(ctx.GetPlace());
-      auto dst_dtype = ConvertToNpuDtype(x->type());
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(x->type()));
       const auto& runner_cast_variance =
           NpuOpRunner("Cast", {*variance}, {cast_variance},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -318,9 +345,12 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
     // dscale should be of  U type
     Tensor* tmp_dscale = dscale;
     Tensor cast_dscale(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        (mean->type() == framework::proto::VarType::FP32 ||
-         variance->type() == framework::proto::VarType::FP32)) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        (framework::TransToProtoVarType(mean->dtype()) ==
+             framework::proto::VarType::FP32 ||
+         framework::TransToProtoVarType(variance->dtype()) ==
+             framework::proto::VarType::FP32)) {
       cast_dscale.Resize(dscale->dims());
       cast_dscale.mutable_data<T>(ctx.GetPlace());
       tmp_dscale = &cast_dscale;
@@ -332,9 +362,12 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
     // same for dbias
     Tensor* tmp_dbias = dbias;
     Tensor cast_dbias(x->type());
-    if (x->type() == framework::proto::VarType::FP16 &&
-        (mean->type() == framework::proto::VarType::FP32 ||
-         variance->type() == framework::proto::VarType::FP32)) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        (framework::TransToProtoVarType(mean->dtype()) ==
+             framework::proto::VarType::FP32 ||
+         framework::TransToProtoVarType(variance->dtype()) ==
+             framework::proto::VarType::FP32)) {
       cast_dbias.Resize(dbias->dims());
       cast_dbias.mutable_data<T>(ctx.GetPlace());
       tmp_dbias = &cast_dbias;
@@ -349,18 +382,24 @@ class LayerNormGradNPUKernel : public framework::OpKernel<T> {
     runner.Run(stream);
 
     // cast back from FP16 to FP32
-    if (x->type() == framework::proto::VarType::FP16 &&
-        dscale->type() == framework::proto::VarType::FP32) {
-      auto dst_dtype = ConvertToNpuDtype(dscale->type());
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(dscale->dtype()) ==
+            framework::proto::VarType::FP32) {
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(dscale->type()));
       const auto& runner_cast_dscale =
           NpuOpRunner("Cast", {*tmp_dscale}, {*dscale},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
       runner_cast_dscale.Run(stream);
     }
     // same for dbias
-    if (x->type() == framework::proto::VarType::FP16 &&
-        dbias->type() == framework::proto::VarType::FP32) {
-      auto dst_dtype = ConvertToNpuDtype(dbias->type());
+    if (framework::TransToProtoVarType(x->dtype()) ==
+            framework::proto::VarType::FP16 &&
+        framework::TransToProtoVarType(dbias->dtype()) ==
+            framework::proto::VarType::FP32) {
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(dbias->type()));
       const auto& runner_cast_dbias =
           NpuOpRunner("Cast", {*tmp_dbias}, {*dbias},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu
index 4bf2a7cb372cb..885d8de0d93b2 100644
--- a/paddle/fluid/operators/linspace_op.cu
+++ b/paddle/fluid/operators/linspace_op.cu
@@ -54,10 +54,10 @@ class CUDALinspaceKernel : public framework::OpKernel<T> {
 
     Tensor start_t;
     Tensor stop_t;
-    auto start_dtype =
-        framework::OpKernelType(pre_start->type(), context.GetPlace());
-    auto stop_dtype =
-        framework::OpKernelType(pre_stop->type(), context.GetPlace());
+    auto start_dtype = framework::OpKernelType(
+        framework::TransToProtoVarType(pre_start->dtype()), context.GetPlace());
+    auto stop_dtype = framework::OpKernelType(
+        framework::TransToProtoVarType(pre_stop->dtype()), context.GetPlace());
     auto out_dtype = framework::OpKernelType(dtype, context.GetPlace());
     framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t);
     framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t);
diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h
index 7e384f4b64bc3..e5f2c9e883cf8 100644
--- a/paddle/fluid/operators/linspace_op.h
+++ b/paddle/fluid/operators/linspace_op.h
@@ -36,10 +36,10 @@ class CPULinspaceKernel : public framework::OpKernel<T> {
 
     Tensor start_t;
     Tensor stop_t;
-    auto start_dtype =
-        framework::OpKernelType(pre_start->type(), context.GetPlace());
-    auto stop_dtype =
-        framework::OpKernelType(pre_stop->type(), context.GetPlace());
+    auto start_dtype = framework::OpKernelType(
+        framework::TransToProtoVarType(pre_start->dtype()), context.GetPlace());
+    auto stop_dtype = framework::OpKernelType(
+        framework::TransToProtoVarType(pre_stop->dtype()), context.GetPlace());
     auto out_dtype = framework::OpKernelType(dtype, context.GetPlace());
     framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t);
     framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t);
diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h
index 3a97b1cd84819..f344188664b81 100644
--- a/paddle/fluid/operators/load_combine_op.h
+++ b/paddle/fluid/operators/load_combine_op.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/op_registry.h"
@@ -108,7 +109,7 @@ class LoadCombineOpKernel : public framework::OpKernel<T> {
         // Get data from fin to tensor
         paddle::framework::DeserializeFromStream(*buffer, tensor, dev_ctx);
 
-        auto in_dtype = tensor->type();
+        auto in_dtype = framework::TransToProtoVarType(tensor->dtype());
         auto out_dtype =
             load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
 
diff --git a/paddle/fluid/operators/load_op.h b/paddle/fluid/operators/load_op.h
index 89ad4325a5a53..52ef4d541dff0 100644
--- a/paddle/fluid/operators/load_op.h
+++ b/paddle/fluid/operators/load_op.h
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -82,7 +83,7 @@ class LoadOpKernel : public framework::OpKernel<T> {
     }
 
     auto load_as_fp16 = ctx.Attr<bool>("load_as_fp16");
-    auto in_dtype = tensor->type();
+    auto in_dtype = framework::TransToProtoVarType(tensor->dtype());
     auto out_dtype = load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
 
     if (in_dtype != out_dtype) {
diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc
index 5f39a9afa94ba..055952f175ca6 100644
--- a/paddle/fluid/operators/lod_tensor_to_array_op.cc
+++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc
@@ -78,7 +78,8 @@ struct LoDTensorToArrayFunctor : public boost::static_visitor<void> {
     LoDTensorToArrayFunctorImpl<DeviceContext> func;
     func.prev_functor_ = this;
     func.dev_ctx_ = dev_ctx;
-    framework::VisitDataType(input_.type(), func);
+    framework::VisitDataType(framework::TransToProtoVarType(input_.dtype()),
+                             func);
   }
 };
 
diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h
index 91b7f91c8e3bc..c0c6809656587 100644
--- a/paddle/fluid/operators/lookup_table_op.h
+++ b/paddle/fluid/operators/lookup_table_op.h
@@ -87,7 +87,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
       int64_t row_width = table_t.value().dims()[1];
       const auto *table = table_t.value().data<T>();
       auto *output = output_t->mutable_data<T>(context.GetPlace());
-      auto input_data_type = table_t.value().type();
+      auto input_data_type =
+          framework::TransToProtoVarType(table_t.value().dtype());
       for (int64_t i = 0; i < ids_numel; ++i) {
         if (padding_idx != kNoPadding && ids[i] == padding_idx) {
           memset(output + i * row_width, 0, row_width * sizeof(T));
diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu
index d182385cce9d2..e327c783ea694 100644
--- a/paddle/fluid/operators/lookup_table_v2_op.cu
+++ b/paddle/fluid/operators/lookup_table_v2_op.cu
@@ -109,7 +109,8 @@ class LookupTableV2CUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     const auto *ids_t = context.Input<framework::Tensor>("Ids");
     LookupTableV2CUDAFunctor<T> functor(context, ids_t);
-    framework::VisitIntDataType(ids_t->type(), functor);
+    framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()),
+                                functor);
   }
 };
 
@@ -216,7 +217,8 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     const auto *ids_t = context.Input<framework::Tensor>("Ids");
     LookupTableV2GradCUDAFunctor<T> functor(context, ids_t);
-    framework::VisitIntDataType(ids_t->type(), functor);
+    framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()),
+                                functor);
   }
 };
 
diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h
index bc433b1e10f3e..88149f53ac7e1 100644
--- a/paddle/fluid/operators/lookup_table_v2_op.h
+++ b/paddle/fluid/operators/lookup_table_v2_op.h
@@ -100,7 +100,8 @@ struct LookupTableV2CPUFunctor {
       int64_t row_width = table_t.value().dims()[1];
       const auto *table = table_t.value().template data<T>();
       auto *output = output_t->template mutable_data<T>(context_.GetPlace());
-      auto input_data_type = table_t.value().type();
+      auto input_data_type =
+          framework::TransToProtoVarType(table_t.value().dtype());
 
       for (int64_t i = 0; i < ids_numel; ++i) {
         if (padding_idx != kNoPadding && ids[i] == padding_idx) {
@@ -143,7 +144,8 @@ class LookupTableV2Kernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     const auto *ids = context.Input<Tensor>("Ids");
     LookupTableV2CPUFunctor<T> functor(context, ids);
-    framework::VisitIntDataType(ids->type(), functor);
+    framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()),
+                                functor);
   }
 };
 
@@ -257,7 +259,8 @@ class LookupTableV2GradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     const auto *ids = context.Input<Tensor>("Ids");
     LookupTableV2GradCPUFunctor<T> functor(context, ids);
-    framework::VisitIntDataType(ids->type(), functor);
+    framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()),
+                                functor);
   }
 };
 
diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc
index 0dfb0cf183086..4b3b47d352c91 100644
--- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc
@@ -134,7 +134,8 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> {
       runner_scatter.Run(stream);
     } else {
       Tensor casted_ids_t;
-      if (ids_t->type() != framework::proto::VarType::INT32) {
+      if (framework::TransToProtoVarType(ids_t->dtype()) !=
+          framework::proto::VarType::INT32) {
         casted_ids_t.mutable_data<int32_t>(ids_t->dims(), ctx.GetPlace());
         const auto &cast_runner = NpuOpRunner("Cast", {*ids_t}, {casted_ids_t},
                                               {{"dst_type", ACL_INT32}});
diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h
index dd0cff5cc5f44..4819bd7251832 100644
--- a/paddle/fluid/operators/lstsq_op.h
+++ b/paddle/fluid/operators/lstsq_op.h
@@ -177,7 +177,7 @@ class LstsqCPUKernel : public framework::OpKernel<T> {
     // "rwork" only used for complex inputs and "gelsy/gelsd/gelss" drivers
     Tensor rwork;
     ValueType* rwork_data = nullptr;
-    if (framework::IsComplexType(x.type()) &&
+    if (framework::IsComplexType(framework::TransToProtoVarType(x.dtype())) &&
         driver != LapackDriverType::Gels) {
       int rwork_len = 0;
       if (driver == LapackDriverType::Gelsy) {
diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h
index b3d79122bcd83..11174540cb0cd 100644
--- a/paddle/fluid/operators/lu_op.h
+++ b/paddle/fluid/operators/lu_op.h
@@ -38,7 +38,7 @@ void SetValueCompute(const framework::ExecutionContext& ctx,
   std::vector<int64_t> decrease_axes = {};
   std::vector<int64_t> none_axes = {};
 
-  auto dtype = in->type();
+  auto dtype = framework::TransToProtoVarType(in->dtype());
 
   auto in_dims = in->dims();
   CheckAndUpdateSliceAttrs<int64_t>(in_dims, axes, starts, ends, &steps);
@@ -88,7 +88,8 @@ void SetValueCompute(const framework::ExecutionContext& ctx,
   // set_value is what we want.
   paddle::framework::TensorCopy(*in, place, out);
 
-  Tensor slice_tensor(dtype), pad_tensor(dtype);
+  Tensor slice_tensor(framework::TransToPtenDataType(dtype)),
+      pad_tensor(framework::TransToPtenDataType(dtype));
   slice_tensor.mutable_data<T>(slice_dims, place);
   pad_tensor.mutable_data<T>(in_dims, place);
 
@@ -146,7 +147,7 @@ void SetValueCompute(const framework::ExecutionContext& ctx,
     ElementwiseComputeEx<SubFunctor<T>, DeviceContext, T>(
         ctx, &slice_tensor, value_tensor, -1, SubFunctor<T>(), &slice_tensor);
   } else {
-    Tensor value_t(dtype);
+    Tensor value_t(framework::TransToPtenDataType(dtype));
     auto value_dims = framework::make_ddim(shape);
     CheckIsDimsMatch(slice_dims_for_assign, value_dims);
 
diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu
index a59909644aa25..386d93ce13219 100644
--- a/paddle/fluid/operators/margin_cross_entropy_op.cu
+++ b/paddle/fluid/operators/margin_cross_entropy_op.cu
@@ -72,8 +72,9 @@ void GetClassInterval(const gpuStream_t& stream, const platform::Place& place,
   PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
       num_classes_per_device_ptr, num_classes_per_device_ptr,
       num_classes_per_device.numel(),
-      platform::ToNCCLDataType(num_classes_per_device.type()), ncclSum,
-      comm->comm(), calcu_stream));
+      platform::ToNCCLDataType(
+          framework::TransToProtoVarType(num_classes_per_device.dtype())),
+      ncclSum, comm->comm(), calcu_stream));
 
   auto class_interval_ptr =
       class_interval->mutable_data<int>({nranks + 1}, place);
@@ -250,7 +251,7 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
 
     int blocks = NumBlocks(N);
     int threads = kNumCUDAThreads;
-    const auto& label_type = labels->type();
+    const auto& label_type = framework::TransToProtoVarType(labels->dtype());
 
     // copy logits to softmax variable since we can't modify logits,
     // and it also be used when calculate grad
@@ -306,8 +307,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
     if (nranks > 1) {
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           logits_max_buff, logits_max_buff, logits_max.numel(),
-          platform::ToNCCLDataType(logits_max.type()), ncclMax, comm->comm(),
-          stream));
+          platform::ToNCCLDataType(
+              framework::TransToProtoVarType(logits_max.dtype())),
+          ncclMax, comm->comm(), stream));
     }
 #endif
 
@@ -328,8 +330,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
     if (nranks > 1) {
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           sum_exp_logits_buff, sum_exp_logits_buff, sum_exp_logits.numel(),
-          platform::ToNCCLDataType(sum_exp_logits.type()), ncclSum,
-          comm->comm(), stream));
+          platform::ToNCCLDataType(
+              framework::TransToProtoVarType(sum_exp_logits.dtype())),
+          ncclSum, comm->comm(), stream));
     }
 #endif
 
@@ -361,8 +364,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
     if (nranks > 1) {
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           loss_ptr, loss_ptr, loss->numel(),
-          platform::ToNCCLDataType(loss->type()), ncclSum, comm->comm(),
-          stream));
+          platform::ToNCCLDataType(
+              framework::TransToProtoVarType(loss->dtype())),
+          ncclSum, comm->comm(), stream));
     }
 #endif
   }
@@ -409,7 +413,7 @@ class MarginCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
 
     int blocks = NumBlocks(N * D);
     int threads = kNumCUDAThreads;
-    const auto& label_type = labels->type();
+    const auto& label_type = framework::TransToProtoVarType(labels->dtype());
 
     Tensor class_interval;
     GetClassInterval(dev_ctx.stream(), context.GetPlace(),
diff --git a/paddle/fluid/operators/math/beam_search_npu.cc b/paddle/fluid/operators/math/beam_search_npu.cc
index 2d5a3dae33b32..96f490c597af0 100644
--- a/paddle/fluid/operators/math/beam_search_npu.cc
+++ b/paddle/fluid/operators/math/beam_search_npu.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/math/beam_search.h"
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
+#include "paddle/pten/common/data_type.h"
 
 namespace pten {
 class DenseTensor;
@@ -69,11 +70,13 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // Step1: Define Tensors and Preprocess the situation that pre_id == end_id
 
     // cast ids and pre_ids from int to float32
-    Tensor ids_int32(framework::proto::VarType::INT32);
-    if (ids->type() != framework::proto::VarType::INT32) {
+    Tensor ids_int32(experimental::DataType::INT32);
+    if (framework::TransToProtoVarType(ids->dtype()) !=
+        framework::proto::VarType::INT32) {
       ids_int32.Resize(ids->dims());
       ids_int32.mutable_data<int>(ctx.GetPlace());
-      auto dst_dtype_ids_int32 = ConvertToNpuDtype(ids_int32.type());
+      auto dst_dtype_ids_int32 =
+          ConvertToNpuDtype(framework::TransToProtoVarType(ids_int32.dtype()));
       const auto& runner_ids_int32 =
           NpuOpRunner("Cast", {*ids}, {ids_int32},
                       {{"dst_type", static_cast<int>(dst_dtype_ids_int32)}});
@@ -82,11 +85,13 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
       ids_int32.ShareDataWith(*ids);
     }
 
-    Tensor pre_ids_int32(framework::proto::VarType::INT32);
-    if (pre_ids->type() != framework::proto::VarType::INT32) {
+    Tensor pre_ids_int32(experimental::DataType::INT32);
+    if (framework::TransToProtoVarType(pre_ids->dtype()) !=
+        framework::proto::VarType::INT32) {
       pre_ids_int32.Resize(pre_ids->dims());
       pre_ids_int32.mutable_data<int>(ctx.GetPlace());
-      auto dst_dtype_pre_ids_int32 = ConvertToNpuDtype(pre_ids_int32.type());
+      auto dst_dtype_pre_ids_int32 = ConvertToNpuDtype(
+          framework::TransToProtoVarType(pre_ids_int32.dtype()));
       const auto& runner_pre_ids_int32 = NpuOpRunner(
           "Cast", {*pre_ids}, {pre_ids_int32},
           {{"dst_type", static_cast<int>(dst_dtype_pre_ids_int32)}});
@@ -95,7 +100,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
       pre_ids_int32.ShareDataWith(*pre_ids);
     }
 
-    Tensor expand_pre_ids(pre_ids_int32.type());
+    Tensor expand_pre_ids(pre_ids_int32.dtype());
     expand_pre_ids.Resize(framework::make_ddim({batch_size, seq_width}));
     expand_pre_ids.mutable_data<int>(place);
     const auto& runner_tile_pre_ids =
@@ -104,7 +109,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_tile_pre_ids.Run(stream);
     expand_pre_ids.Resize(ids_int32.dims());
 
-    Tensor expand_pre_scores(pre_scores->type());
+    Tensor expand_pre_scores(pre_scores->dtype());
     expand_pre_scores.Resize(framework::make_ddim({batch_size, seq_width}));
     expand_pre_scores.mutable_data<float>(place);
     const auto& runner_tile_pre_scores =
@@ -114,11 +119,11 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     expand_pre_scores.Resize(scores->dims());
 
     // End_id Tensors
-    Tensor end_id_tmp_tensor(framework::proto::VarType::INT32);
+    Tensor end_id_tmp_tensor(experimental::DataType::INT32);
     end_id_tmp_tensor.mutable_data<int>({1}, ctx.GetPlace());
     FillNpuTensorWithConstant<int>(&end_id_tmp_tensor, end_id);
 
-    Tensor end_id_tensors(ids_int32.type());
+    Tensor end_id_tensors(ids_int32.dtype());
     end_id_tensors.mutable_data<int>(ids_int32.dims(), place);
     const auto& runner_fill_end_id =
         NpuOpRunner("FillD", {end_id_tmp_tensor}, {end_id_tensors},
@@ -126,7 +131,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_fill_end_id.Run(stream);
 
     // whether expand_pre_ids == end_ids?
-    Tensor equal_end_ids(framework::proto::VarType::BOOL);
+    Tensor equal_end_ids(experimental::DataType::BOOL);
     equal_end_ids.mutable_data<bool>(ids_int32.dims(), place);
     const auto& runner_equal_end_ids = NpuOpRunner(
         "Equal", {expand_pre_ids, end_id_tensors}, {equal_end_ids}, {});
@@ -136,11 +141,11 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // [[False, True, True, True, ...],
     //  [False, True, True, True, ...],
     //  ...]
-    Tensor false_tmp_tensor(framework::proto::VarType::INT32);
+    Tensor false_tmp_tensor(experimental::DataType::INT32);
     false_tmp_tensor.mutable_data<int>({1}, ctx.GetPlace());
     FillNpuTensorWithConstant<int>(&false_tmp_tensor, static_cast<int>(false));
 
-    Tensor first_pos_false_tensors(framework::proto::VarType::INT32);
+    Tensor first_pos_false_tensors(experimental::DataType::INT32);
     first_pos_false_tensors.Resize(framework::make_ddim({batch_size, 1}));
     first_pos_false_tensors.mutable_data<int>(place);
     std::vector<int64_t> fill_dims = {batch_size, 1};
@@ -149,16 +154,16 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
         "FillD", {false_tmp_tensor}, {first_pos_false_tensors}, fill_attr);
     runner_fill_false_tensors.Run(stream);
 
-    Tensor pos_tensors(framework::proto::VarType::INT32);
+    Tensor pos_tensors(experimental::DataType::INT32);
     if (seq_width > 1) {
       pos_tensors.Resize(framework::make_ddim({batch_size, seq_width}));
       pos_tensors.mutable_data<int>(place);
 
-      Tensor true_tmp_tensor(framework::proto::VarType::INT32);
+      Tensor true_tmp_tensor(experimental::DataType::INT32);
       true_tmp_tensor.mutable_data<int>({1}, ctx.GetPlace());
       FillNpuTensorWithConstant<int>(&true_tmp_tensor, static_cast<int>(true));
 
-      Tensor second_pos_true_tensors(framework::proto::VarType::INT32);
+      Tensor second_pos_true_tensors(experimental::DataType::INT32);
       second_pos_true_tensors.Resize(
           framework::make_ddim({batch_size, seq_width - 1}));
       second_pos_true_tensors.mutable_data<int>(place);
@@ -182,10 +187,11 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
       pos_tensors.ShareDataWith(first_pos_false_tensors);
     }
 
-    Tensor cast_pos_tensors_bool(framework::proto::VarType::BOOL);
+    Tensor cast_pos_tensors_bool(experimental::DataType::BOOL);
     cast_pos_tensors_bool.Resize(pos_tensors.dims());
     cast_pos_tensors_bool.mutable_data<bool>(ctx.GetPlace());
-    auto dst_dtype = ConvertToNpuDtype(cast_pos_tensors_bool.type());
+    auto dst_dtype = ConvertToNpuDtype(
+        framework::TransToProtoVarType(cast_pos_tensors_bool.type()));
     const auto& runner_cast_pos_tensors =
         NpuOpRunner("Cast", {pos_tensors}, {cast_pos_tensors_bool},
                     {{"dst_type", static_cast<int>(dst_dtype)}});
@@ -193,7 +199,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
 
     // if pre_ids == end_ids, save only one score, and others become -inf
     // construct pre_ids == end_ids and save only one score
-    Tensor save_one_end_score(framework::proto::VarType::BOOL);
+    Tensor save_one_end_score(experimental::DataType::BOOL);
     save_one_end_score.mutable_data<bool>(ids_int32.dims(), place);
     const auto& runner_logical_and =
         NpuOpRunner("LogicalAnd", {equal_end_ids, cast_pos_tensors_bool},
@@ -202,13 +208,13 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
 
     // if save_one_end_score is True, set score to -inf
     // define -Inf Tensors
-    Tensor ninf_tmp_tensor(scores->type());
+    Tensor ninf_tmp_tensor(scores->dtype());
     ninf_tmp_tensor.mutable_data<float>({1}, ctx.GetPlace());
     float ninf_value =
         static_cast<float>(-std::numeric_limits<float>::infinity());
     FillNpuTensorWithConstant<float>(&ninf_tmp_tensor, ninf_value);
 
-    Tensor ninf_tensors(scores->type());
+    Tensor ninf_tensors(scores->dtype());
     ninf_tensors.mutable_data<float>(scores->dims(), place);
     const auto& runner_fill_ninf =
         NpuOpRunner("FillD", {ninf_tmp_tensor}, {ninf_tensors},
@@ -218,22 +224,22 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // Step2: calculate topk scores
 
     // get scores used in topk op
-    Tensor tmp_scores(scores->type());
+    Tensor tmp_scores(scores->dtype());
     tmp_scores.mutable_data<float>(scores->dims(), place);
     if (!is_accumulated) {
       // if pre_id == end_id, cal_scores = pre_score, and id = end_id
       // else, cal_score = pre_score + log(score)
 
       // calculate log(scores)
-      Tensor log_scores(scores->type());
+      Tensor log_scores(scores->dtype());
       log_scores.mutable_data<float>(scores->dims(), place);
 
-      Tensor one(scores->type());
+      Tensor one(scores->dtype());
       one.mutable_data<float>(scores->dims(), place);
       const auto& runner_one = NpuOpRunner("OnesLike", {*scores}, {one}, {});
       runner_one.Run(stream);
 
-      Tensor sub(scores->type());
+      Tensor sub(scores->dtype());
       sub.mutable_data<float>(scores->dims(), place);
       const auto& runner_sub = NpuOpRunner("Sub", {*scores, one}, {sub}, {});
       runner_sub.Run(stream);
@@ -261,7 +267,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     }
 
     // if pre_ids == end_ids, save only one score, and others become -inf
-    Tensor cal_scores(scores->type());
+    Tensor cal_scores(scores->dtype());
     cal_scores.mutable_data<float>(scores->dims(), place);
     const auto& runner_select_inf_score =
         NpuOpRunner("Select", {save_one_end_score, ninf_tensors, tmp_scores},
@@ -273,12 +279,12 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     cal_scores.Resize(
         framework::make_ddim({num_seqs, real_beam_size * seq_width}));
 
-    Tensor topk_scores(scores->type());
+    Tensor topk_scores(scores->dtype());
     topk_scores.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size)}));
     topk_scores.mutable_data<float>(ctx.GetPlace());
 
-    Tensor tmp_indices(framework::proto::VarType::INT32);
+    Tensor tmp_indices(experimental::DataType::INT32);
     tmp_indices.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size)}));
     tmp_indices.mutable_data<int>(ctx.GetPlace());
@@ -296,21 +302,21 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_topk.Run(stream);
 
     // cast tmp_indices from int to float32 for Sort op
-    Tensor cast_tmp_indices(framework::proto::VarType::FP32);
+    Tensor cast_tmp_indices(experimental::DataType::FLOAT32);
     cast_tmp_indices.Resize(tmp_indices.dims());
     cast_tmp_indices.mutable_data<float>(ctx.GetPlace());
-    auto dst_dtype_tmp_indices_fp32 =
-        ConvertToNpuDtype(cast_tmp_indices.type());
+    auto dst_dtype_tmp_indices_fp32 = ConvertToNpuDtype(
+        framework::TransToProtoVarType(cast_tmp_indices.type()));
     const auto& runner_cast_tmp_indices = NpuOpRunner(
         "Cast", {tmp_indices}, {cast_tmp_indices},
         {{"dst_type", static_cast<int>(dst_dtype_tmp_indices_fp32)}});
     runner_cast_tmp_indices.Run(stream);
 
     // sort tmp_indices
-    Tensor sorted_tmp_indices(framework::proto::VarType::FP32);
+    Tensor sorted_tmp_indices(experimental::DataType::FLOAT32);
     sorted_tmp_indices.Resize(tmp_indices.dims());
     sorted_tmp_indices.mutable_data<float>(ctx.GetPlace());
-    Tensor sorted_score_indices(framework::proto::VarType::INT32);
+    Tensor sorted_score_indices(experimental::DataType::INT32);
     sorted_score_indices.Resize(tmp_indices.dims());
     sorted_score_indices.mutable_data<int>(ctx.GetPlace());
     const auto& runner_sort_tmp_indices = NpuOpRunner(
@@ -319,11 +325,11 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_sort_tmp_indices.Run(stream);
 
     // cast sorted_tmp_indices from float32 to int
-    Tensor cast_sort_tmp_indices(framework::proto::VarType::INT32);
+    Tensor cast_sort_tmp_indices(experimental::DataType::INT32);
     cast_sort_tmp_indices.Resize(sorted_tmp_indices.dims());
     cast_sort_tmp_indices.mutable_data<int>(ctx.GetPlace());
-    auto dst_dtype_tmp_indices_int32 =
-        ConvertToNpuDtype(cast_sort_tmp_indices.type());
+    auto dst_dtype_tmp_indices_int32 = ConvertToNpuDtype(
+        framework::TransToProtoVarType(cast_sort_tmp_indices.type()));
     const auto& runner_cast_sort_tmp_indices = NpuOpRunner(
         "Cast", {sorted_tmp_indices}, {cast_sort_tmp_indices},
         {{"dst_type", static_cast<int>(dst_dtype_tmp_indices_int32)}});
@@ -332,7 +338,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // Step 3: infer selected ids from tmp_indices and ids
 
     // if pre_ids == end_ids, use pre_ids rather than ids
-    Tensor cal_ids(ids_int32.type());
+    Tensor cal_ids(ids_int32.dtype());
     cal_ids.mutable_data<int>(ids_int32.dims(), place);
     const auto& runner_select_equal_end_id = NpuOpRunner(
         "Select", {equal_end_ids, expand_pre_ids, ids_int32}, {cal_ids}, {});
@@ -347,7 +353,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // construct batch_ids like [[0, 0, 0], [1, 1, 1], ..., [bs-1, bs-1, bs-1]]
     // construct arange(num_seqs*beam_size).reshape((num_seqs, beam_size)) //
     // beam_size
-    Tensor batch_ids(framework::proto::VarType::INT32);
+    Tensor batch_ids(experimental::DataType::INT32);
     batch_ids.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size), 1}));
     batch_ids.mutable_data<int>(place);
@@ -362,7 +368,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
 
     // sort topk_scores to get selected_scores
     // get indices of gather_nd op for calculating selected_scores
-    Tensor gather_nd_score_indices(framework::proto::VarType::INT32);
+    Tensor gather_nd_score_indices(experimental::DataType::INT32);
     gather_nd_score_indices.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size), 2}));
     gather_nd_score_indices.mutable_data<int>(place);
@@ -388,7 +394,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     // get indices of gather_nd op
     cast_sort_tmp_indices.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size), 1}));
-    Tensor gather_nd_id_indices(framework::proto::VarType::INT32);
+    Tensor gather_nd_id_indices(experimental::DataType::INT32);
     gather_nd_id_indices.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size), 2}));
     gather_nd_id_indices.mutable_data<int>(place);
@@ -403,7 +409,7 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_concat_id_indices.Run(stream);
 
     // use gather_nd to get selected_ids
-    Tensor topk_ids(framework::proto::VarType::INT32);
+    Tensor topk_ids(experimental::DataType::INT32);
     topk_ids.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size)}));
     topk_ids.mutable_data<int>(ctx.GetPlace());
@@ -413,7 +419,8 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_gather_nd_ids.Run(stream);
 
     // cast topk_ids from int to int64 to get selected_ids
-    auto dst_dtype_selected_ids = ConvertToNpuDtype(selected_ids->type());
+    auto dst_dtype_selected_ids =
+        ConvertToNpuDtype(framework::TransToProtoVarType(selected_ids->type()));
     const auto& runner_cast_selected_ids =
         NpuOpRunner("Cast", {topk_ids}, {*selected_ids},
                     {{"dst_type", static_cast<int>(dst_dtype_selected_ids)}});
@@ -423,13 +430,13 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
 
     // Step 4: set lod of output Tensor
     // define Tensor with value `seq_width`
-    Tensor seq_width_tensor(framework::proto::VarType::INT32);
+    Tensor seq_width_tensor(experimental::DataType::INT32);
     seq_width_tensor.mutable_data<int>({1}, ctx.GetPlace());
     FillNpuTensorWithConstant<int>(&seq_width_tensor,
                                    static_cast<int>(seq_width));
 
     // beam_ids = tmp_indices // seq_width
-    Tensor beam_ids(framework::proto::VarType::INT32);
+    Tensor beam_ids(experimental::DataType::INT32);
     beam_ids.Resize(
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size)}));
     beam_ids.mutable_data<int>(ctx.GetPlace());
@@ -447,17 +454,18 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
         framework::make_ddim({num_seqs, static_cast<int64_t>(beam_size)}));
 
     // cast batch_ids from int to float32
-    Tensor cast_batch_ids(framework::proto::VarType::FP32);
+    Tensor cast_batch_ids(experimental::DataType::FLOAT32);
     cast_batch_ids.Resize(batch_ids.dims());
     cast_batch_ids.mutable_data<float>(ctx.GetPlace());
-    auto dst_dtype1 = ConvertToNpuDtype(cast_batch_ids.type());
+    auto dst_dtype1 = ConvertToNpuDtype(
+        framework::TransToProtoVarType(cast_batch_ids.type()));
     const auto& runner_cast_batch_ids =
         NpuOpRunner("Cast", {batch_ids}, {cast_batch_ids},
                     {{"dst_type", static_cast<int>(dst_dtype1)}});
     runner_cast_batch_ids.Run(stream);
 
     // scale batch_ids with beam_size
-    Tensor scale_batch_ids(framework::proto::VarType::FP32);
+    Tensor scale_batch_ids(experimental::DataType::FLOAT32);
     scale_batch_ids.Resize(batch_ids.dims());
     scale_batch_ids.mutable_data<float>(place);
     const auto& runner_power =
@@ -468,17 +476,18 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_power.Run(stream);
 
     // cast cast_scale_batch_ids from float32 to int
-    Tensor cast_scale_batch_ids(framework::proto::VarType::INT32);
+    Tensor cast_scale_batch_ids(experimental::DataType::INT32);
     cast_scale_batch_ids.Resize(scale_batch_ids.dims());
     cast_scale_batch_ids.mutable_data<int>(ctx.GetPlace());
-    auto dst_dtype2 = ConvertToNpuDtype(cast_scale_batch_ids.type());
+    auto dst_dtype2 = ConvertToNpuDtype(
+        framework::TransToProtoVarType(cast_scale_batch_ids.type()));
     const auto& runner_cast_scale_batch_ids =
         NpuOpRunner("Cast", {scale_batch_ids}, {cast_scale_batch_ids},
                     {{"dst_type", static_cast<int>(dst_dtype2)}});
     runner_cast_scale_batch_ids.Run(stream);
 
     // calculate parent_idx
-    Tensor tmp_parent_idx(framework::proto::VarType::INT32);
+    Tensor tmp_parent_idx(experimental::DataType::INT32);
     tmp_parent_idx.Resize(parent_idx->dims());
     tmp_parent_idx.mutable_data<int>(place);
     const auto& runner_add_beam_id = NpuOpRunner(
@@ -486,7 +495,8 @@ class BeamSearchFunctor<platform::NPUDeviceContext, T> {
     runner_add_beam_id.Run(stream);
 
     // cast tmp_parent_idx from int to int64 to get parent_idx
-    auto dst_dtype_parent_idx = ConvertToNpuDtype(parent_idx->type());
+    auto dst_dtype_parent_idx =
+        ConvertToNpuDtype(framework::TransToProtoVarType(parent_idx->type()));
     const auto& runner_cast_parent_idx =
         NpuOpRunner("Cast", {tmp_parent_idx}, {*parent_idx},
                     {{"dst_type", static_cast<int>(dst_dtype_parent_idx)}});
diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc
index ec78b2dd7b012..0b0584608a300 100644
--- a/paddle/fluid/operators/math/cross_entropy.cc
+++ b/paddle/fluid/operators/math/cross_entropy.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/operators/math/cross_entropy.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace platform {
@@ -112,7 +113,8 @@ class CrossEntropyFunctor<platform::CPUDeviceContext, T> {
     } else {
       HardLabelCrossEntropyCPUFunctorImpl<T> functor_impl(
           out, prob, labels, ignore_index, axis_dim);
-      framework::VisitIntDataType(labels->type(), functor_impl);
+      framework::VisitIntDataType(
+          framework::TransToProtoVarType(labels->dtype()), functor_impl);
     }
   }
 };
diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu
index a13adb63fdc54..829ac9fb55964 100644
--- a/paddle/fluid/operators/math/cross_entropy.cu
+++ b/paddle/fluid/operators/math/cross_entropy.cu
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/math.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
@@ -122,7 +123,8 @@ class CrossEntropyFunctor<platform::CUDADeviceContext, T> {
       HardLabelCrossEntropyCUDAFunctorImpl<T> functor(
           loss_data, prob_data, labels->data(), batch_size, class_num,
           ignore_index, kMaxBlockDim, ctx.stream());
-      framework::VisitDataType(labels->type(), functor);
+      framework::VisitDataType(framework::TransToProtoVarType(labels->dtype()),
+                               functor);
     }
   }
 };
diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h
index 2cfff0ae88ff6..9ce615c949ffc 100644
--- a/paddle/fluid/operators/math/eigen_values_vectors.h
+++ b/paddle/fluid/operators/math/eigen_values_vectors.h
@@ -108,7 +108,8 @@ struct MatrixEighFunctor<platform::CPUDeviceContext, T> {
     ValueType *rwork_data = nullptr;
 
     // complex type
-    if (framework::IsComplexType(input.type())) {
+    if (framework::IsComplexType(
+            framework::TransToProtoVarType(input.dtype()))) {
       lrwork = std::max<int>(1, static_cast<int>(rwork_opt));
       rwork_data = rwork_tensor.mutable_data<ValueType>(
           framework::make_ddim({lrwork}), ctx.GetPlace());
@@ -180,7 +181,8 @@ struct MatrixEighFunctor<platform::CUDADeviceContext, T> {
     // When the input type is float32, and the feature value input dimension is
     // greater than or equal to [*,32,32]  and less than or equal to
     // [*,512,512], Syevj has better performance.
-    bool use_syevj = (input.type() == framework::proto::VarType::FP32 &&
+    bool use_syevj = (framework::TransToProtoVarType(input.dtype()) ==
+                          framework::proto::VarType::FP32 &&
                       values_stride >= 32 && values_stride <= 512);
     syevjInfo_t syevj_params;
     if (use_syevj) {
diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc
index ee6610eae1469..f23c3f14c5c9a 100644
--- a/paddle/fluid/operators/math/matrix_solve.cu.cc
+++ b/paddle/fluid/operators/math/matrix_solve.cu.cc
@@ -65,7 +65,7 @@ class MatrixSolveFunctor<platform::CUDADeviceContext, T> {
     // copy input A to a temporary tensor tmp_a,
     // LU factorization, written back to original matrix A, so in the beginning,
     // it's necessary to create a temporary tensor tmp_a.
-    Tensor tmp_a(a.type());
+    Tensor tmp_a(a.dtype());
     tmp_a.Resize(a.dims());
     tmp_a.mutable_data<T>(context.GetPlace());
     framework::TensorCopy(a, context.GetPlace(), &tmp_a);
diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc
index 6b24f4778442b..9f0f8a96c7677 100644
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@@ -765,8 +765,9 @@ class MatMulOp : public framework::OperatorWithKernel {
       const framework::OpKernelType &expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/matmul_op_npu.cc b/paddle/fluid/operators/matmul_op_npu.cc
index 1f2626b5f1ea3..f97921b561f96 100644
--- a/paddle/fluid/operators/matmul_op_npu.cc
+++ b/paddle/fluid/operators/matmul_op_npu.cc
@@ -32,7 +32,7 @@ static void Mul(const framework::ExecutionContext& ctx,
     const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {*Out}, {});
     runner_dx.Run(stream);
   } else {
-    Tensor Out_temp(Out->type());
+    Tensor Out_temp(Out->dtype());
     Out_temp.mutable_data<T>(Out->dims(), ctx.GetPlace());
     const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {Out_temp}, {});
     runner_dx.Run(stream);
@@ -53,7 +53,7 @@ static void Dot(const framework::ExecutionContext& ctx,
     const auto& runner = NpuOpRunner("Dot", {X, Y}, {*Out});
     runner.Run(stream);
   } else {
-    Tensor Out_temp(Out->type());
+    Tensor Out_temp(Out->dtype());
     Out_temp.mutable_data<T>(Out->dims(), ctx.GetPlace());
     const auto& out_temp_runner = NpuOpRunner("Dot", {X, Y}, {Out_temp});
     out_temp_runner.Run(stream);
@@ -77,7 +77,7 @@ static void MatMul2D(const framework::ExecutionContext& ctx,
                     {{"transpose_x1", trans_x}, {"transpose_x2", trans_y}});
     runner.Run(stream);
   } else {
-    Tensor Out_temp(Out->type());
+    Tensor Out_temp(Out->dtype());
     Out_temp.mutable_data<T>(Out->dims(), ctx.GetPlace());
     const auto& out_temp_runner =
         NpuOpRunner("MatMul", {X, Y}, {Out_temp},
@@ -103,7 +103,7 @@ static void MatMulND(const framework::ExecutionContext& ctx,
                     {{"adj_x1", trans_x}, {"adj_x2", trans_y}});
     runner.Run(stream);
   } else {
-    Tensor Out_temp(Out->type());
+    Tensor Out_temp(Out->dtype());
     Out_temp.mutable_data<T>(Out->dims(), ctx.GetPlace());
     const auto& out_temp_runner =
         NpuOpRunner("BatchMatMul", {X, Y}, {Out_temp},
@@ -235,7 +235,7 @@ class MatMulNPUKernel : public framework::OpKernel<T> {
     std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2);
     std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2);
 
-    Tensor x_temp_brd(X->type());
+    Tensor x_temp_brd(X->dtype());
     if (x_dims == x_broadcast_dims) {
       x_temp_brd.ShareDataWith(*X);
       x_temp_brd.Resize(framework::make_ddim(x_broadcast_dims));
@@ -250,7 +250,7 @@ class MatMulNPUKernel : public framework::OpKernel<T> {
           .Run(stream);
     }
 
-    Tensor y_temp_brd(Y->type());
+    Tensor y_temp_brd(Y->dtype());
     if (y_dims == y_broadcast_dims) {
       y_temp_brd.ShareDataWith(*Y);
       y_temp_brd.Resize(framework::make_ddim(y_broadcast_dims));
@@ -293,7 +293,7 @@ class MatMulGradNPUKernel : public framework::OpKernel<T> {
 
     // Case 1: [K] x [K] = [1]
     if (x_ndim == 1 && y_ndim == 1) {
-      Tensor dout_temp(dOut->type());
+      Tensor dout_temp(dOut->dtype());
       dout_temp.Resize(X->dims());
       dout_temp.mutable_data<T>(ctx.GetPlace());
       NpuOpRunner runner;
@@ -395,7 +395,7 @@ class MatMulGradNPUKernel : public framework::OpKernel<T> {
     std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2);
     std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2);
 
-    Tensor x_temp_brd(X->type());
+    Tensor x_temp_brd(X->dtype());
     if (x_dims == x_broadcast_dims) {
       x_temp_brd.ShareDataWith(*X);
       x_temp_brd.Resize(framework::make_ddim(x_broadcast_dims));
@@ -410,7 +410,7 @@ class MatMulGradNPUKernel : public framework::OpKernel<T> {
           .Run(stream);
     }
 
-    Tensor y_temp_brd(Y->type());
+    Tensor y_temp_brd(Y->dtype());
     if (y_dims == y_broadcast_dims) {
       y_temp_brd.ShareDataWith(*Y);
       y_temp_brd.Resize(framework::make_ddim(y_broadcast_dims));
@@ -435,7 +435,7 @@ class MatMulGradNPUKernel : public framework::OpKernel<T> {
                       !transpose_y, alpha);
         }
       } else {
-        Tensor dx_temp(X->type());
+        Tensor dx_temp(X->dtype());
         dx_temp.Resize(framework::make_ddim(x_broadcast_dims));
         if (transpose_x) {
           MatMulND<T>(ctx, stream, y_temp_brd, dout_temp, &dx_temp, transpose_y,
@@ -457,7 +457,7 @@ class MatMulGradNPUKernel : public framework::OpKernel<T> {
                       false, alpha);
         }
       } else {
-        Tensor dy_temp(Y->type());
+        Tensor dy_temp(Y->dtype());
         dy_temp.Resize(framework::make_ddim(y_broadcast_dims));
         if (transpose_y) {
           MatMulND<T>(ctx, stream, dout_temp, x_temp_brd, &dy_temp, true,
diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc
index f7a5e2a8af409..eaec4b78f4fc0 100644
--- a/paddle/fluid/operators/matmul_v2_op.cc
+++ b/paddle/fluid/operators/matmul_v2_op.cc
@@ -272,8 +272,9 @@ class MatMulV2Op : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
@@ -367,8 +368,9 @@ class MatMulV2OpGrad : public framework::OperatorWithKernel {
       const framework::OpKernelType& expected_kernel_type) const {
     if (framework::IsComplexType(expected_kernel_type.data_type_)) {
       // only promote inputs’s types when contains complex input
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     } else {
       return framework::OpKernelType(expected_kernel_type.data_type_,
                                      tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/matrix_power_op.h b/paddle/fluid/operators/matrix_power_op.h
index 93755b22bf93a..58a8ef87628fe 100644
--- a/paddle/fluid/operators/matrix_power_op.h
+++ b/paddle/fluid/operators/matrix_power_op.h
@@ -108,7 +108,7 @@ void MatrixPowerFunction(const Tensor* X, const int n, Tensor* Out,
 
   // Calculate Out = newX^{n} for abs(n) > 4 with time complexity as O(logN)
   int bit = 0;
-  Tensor z = Tensor(X->type());
+  Tensor z = Tensor(X->dtype());
   bool out_inited = false;
   Tensor temp_out = ctx.AllocateTmpTensor<T, DeviceContext>(X->dims(), dev_ctx);
   Tensor temp_z = ctx.AllocateTmpTensor<T, DeviceContext>(X->dims(), dev_ctx);
diff --git a/paddle/fluid/operators/mean_op_mlu.cc b/paddle/fluid/operators/mean_op_mlu.cc
index ca4f3dcc3f465..c25b25aa50ea4 100644
--- a/paddle/fluid/operators/mean_op_mlu.cc
+++ b/paddle/fluid/operators/mean_op_mlu.cc
@@ -45,10 +45,12 @@ class MeanMLUKernel : public framework::OpKernel<T> {
       reduce_dims.push_back(i);
     }
 
-    MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY,
-                                 ToCnnlDataType(input->type()));
-    MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(output->type()));
+    MLUCnnlTensorDesc input_desc(
+        *input, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
+    MLUCnnlTensorDesc output_desc(
+        *output, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));
 
     MLUCnnlReduceDesc reduction_desc(
         reduce_dims, CNNL_REDUCE_AVG, ToCnnlDataType<T>(),
@@ -88,18 +90,21 @@ class MeanMLUGradKernel : public framework::OpKernel<T> {
     }
 
     // means
-    Tensor mean_var(output_grad->type());
+    Tensor mean_var(framework::TransToProtoVarType(output_grad->dtype()));
     mean_var.mutable_data<T>(input_grad->dims(), context.GetPlace());
-    MLUCnnlTensorDesc mean_var_desc(mean_var, CNNL_LAYOUT_ARRAY,
-                                    ToCnnlDataType(mean_var.type()));
+    MLUCnnlTensorDesc mean_var_desc(
+        mean_var, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(mean_var.dtype())));
     auto value = static_cast<T>(1.0 / static_cast<float>(input_grad->numel()));
     MLUCnnl::Fill(context, value, mean_var_desc.get(), GetBasePtr(&mean_var));
 
     // means mul output_grad
-    MLUCnnlTensorDesc in_desc(*output_grad, CNNL_LAYOUT_ARRAY,
-                              ToCnnlDataType(output_grad->type()));
-    MLUCnnlTensorDesc out_desc(*input_grad, CNNL_LAYOUT_ARRAY,
-                               ToCnnlDataType(input_grad->type()));
+    MLUCnnlTensorDesc in_desc(
+        *output_grad, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(output_grad->dtype())));
+    MLUCnnlTensorDesc out_desc(
+        *input_grad, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input_grad->dtype())));
 
     MLUCnnlOpTensorDesc op_tensor_desc(CNNL_OP_TENSOR_MUL, ToCnnlDataType<T>(),
                                        CNNL_NOT_PROPAGATE_NAN);
diff --git a/paddle/fluid/operators/mean_op_npu.cc b/paddle/fluid/operators/mean_op_npu.cc
index d83ed37aa7b5d..d81594658044a 100644
--- a/paddle/fluid/operators/mean_op_npu.cc
+++ b/paddle/fluid/operators/mean_op_npu.cc
@@ -59,20 +59,20 @@ class MeanGradNPUKernel : public framework::OpKernel<T> {
     IG->mutable_data<T>(context.GetPlace());
 
     // ones
-    Tensor ones(grad->type());
+    Tensor ones(grad->dtype());
     ones.mutable_data<T>(IG->dims(), context.GetPlace());
     const auto& runner_ones = NpuOpRunner("OnesLike", {*IG}, {ones}, {});
     runner_ones.Run(stream);
 
     // means
-    Tensor mean_tensor(grad->type());
+    Tensor mean_tensor(grad->dtype());
     mean_tensor.Resize({1});
     mean_tensor.mutable_data<T>(context.GetPlace());
     FillNpuTensorWithConstant<T>(
         &mean_tensor, static_cast<T>(1.0 / static_cast<float>(IG->numel())));
 
     // means mul ones
-    Tensor mean_ma(grad->type());
+    Tensor mean_ma(grad->dtype());
     mean_ma.Resize(IG->dims());
     mean_ma.mutable_data<T>(context.GetPlace());
     const auto& runner_mul_1 =
diff --git a/paddle/fluid/operators/memcpy_h2d_op.h b/paddle/fluid/operators/memcpy_h2d_op.h
index 6d1b8eee04306..0a3921c9f6759 100644
--- a/paddle/fluid/operators/memcpy_h2d_op.h
+++ b/paddle/fluid/operators/memcpy_h2d_op.h
@@ -46,7 +46,7 @@ class MemcpyH2DFunctor {
     auto stream = nullptr;
 #endif
     out_tensor.mutable_data(
-        dev_ctx_.GetPlace(), lod_tensor.type(),
+        dev_ctx_.GetPlace(), lod_tensor.dtype(),
         pten::Stream(reinterpret_cast<pten::StreamId>(stream)));
 
     if (dst_place_type_ == 0 || dst_place_type_ == 1) {
diff --git a/paddle/fluid/operators/meshgrid_op.cc b/paddle/fluid/operators/meshgrid_op.cc
index 54600e26bb57f..1872547d2ae56 100644
--- a/paddle/fluid/operators/meshgrid_op.cc
+++ b/paddle/fluid/operators/meshgrid_op.cc
@@ -59,7 +59,7 @@ class MeshgridOp : public framework::OperatorWithKernel {
     bool flag = 0;
     for (auto* input : inputs) {
       if (input->IsInitialized() && input->numel() > 0) {
-        input_data_type = input->type();
+        input_data_type = framework::TransToProtoVarType(input->dtype());
         flag = 1;
         break;
       }
diff --git a/paddle/fluid/operators/meshgrid_op_npu.cc b/paddle/fluid/operators/meshgrid_op_npu.cc
index 6b16d919a8904..dab694b03b427 100644
--- a/paddle/fluid/operators/meshgrid_op_npu.cc
+++ b/paddle/fluid/operators/meshgrid_op_npu.cc
@@ -54,7 +54,7 @@ class MeshgridNPUKernel : public framework::OpKernel<T> {
       view_shape[i] = shape[i];
 
       framework::DDim out_dims_reshape = framework::make_ddim(view_shape);
-      framework::Tensor reshape_ins_tensor(ins[i]->type());
+      framework::Tensor reshape_ins_tensor(ins[i]->dtype());
       reshape_ins_tensor.ShareDataWith(*ins[i]);
       reshape_ins_tensor.Resize(out_dims_reshape);
 
diff --git a/paddle/fluid/operators/metrics/accuracy_op_npu.cc b/paddle/fluid/operators/metrics/accuracy_op_npu.cc
index 4b81296dd1a93..e279dc5815338 100644
--- a/paddle/fluid/operators/metrics/accuracy_op_npu.cc
+++ b/paddle/fluid/operators/metrics/accuracy_op_npu.cc
@@ -40,11 +40,12 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     }
 
     // cast `indices` or `label` if their type is not consistent
-    Tensor cast_indices(framework::proto::VarType::INT32);
-    Tensor cast_label(framework::proto::VarType::INT32);
-    if (indices->type() != label->type()) {
+    Tensor cast_indices(experimental::DataType::INT32);
+    Tensor cast_label(experimental::DataType::INT32);
+    if (indices->dtype() != label->dtype()) {
       auto dst_dtype = ConvertToNpuDtype(framework::proto::VarType::INT32);
-      if (indices->type() != framework::proto::VarType::INT32) {
+      if (framework::TransToProtoVarType(indices->dtype()) !=
+          framework::proto::VarType::INT32) {
         cast_indices.Resize(indices->dims());
         cast_indices.mutable_data<int>(ctx.GetPlace());
         const auto& runner_cast_indices =
@@ -54,7 +55,8 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
       } else {
         cast_indices.ShareDataWith(*indices);
       }
-      if (label->type() != framework::proto::VarType::INT32) {
+      if (framework::TransToProtoVarType(label->dtype()) !=
+          framework::proto::VarType::INT32) {
         cast_label.Resize(label->dims());
         cast_label.mutable_data<int>(ctx.GetPlace());
         const auto& runner_cast_label =
@@ -70,7 +72,7 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     }
 
     // equal
-    Tensor tmp_equal(framework::proto::VarType::BOOL);
+    Tensor tmp_equal(experimental::DataType::BOOL);
     tmp_equal.Resize(inference->dims());
     tmp_equal.mutable_data<bool>(ctx.GetPlace());
     const auto& runner_equal =
@@ -78,18 +80,19 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     runner_equal.Run(stream);
 
     // cast equal
-    Tensor tmp_equal_cast(framework::proto::VarType::FP32);
+    Tensor tmp_equal_cast(experimental::DataType::FLOAT32);
     tmp_equal_cast.Resize(inference->dims());
     tmp_equal_cast.mutable_data<float>(ctx.GetPlace());
     const auto& runner_cast_equal = NpuOpRunner(
         "Cast", {tmp_equal}, {tmp_equal_cast},
         {{"dst_type",
-          static_cast<int>(ConvertToNpuDtype(tmp_equal_cast.type()))}});
+          static_cast<int>(ConvertToNpuDtype(
+              framework::TransToProtoVarType(tmp_equal_cast.dtype())))}});
     runner_cast_equal.Run(stream);
 
     // [correct]
     // reduce_max
-    Tensor tmp_correct_max(framework::proto::VarType::FP32);
+    Tensor tmp_correct_max(experimental::DataType::FLOAT32);
     tmp_correct_max.Resize(framework::make_ddim({num_samples}));
     tmp_correct_max.mutable_data<float>(ctx.GetPlace());
     const auto& runner_reduce_max =
@@ -98,7 +101,7 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     runner_reduce_max.Run(stream);
 
     // reduce_sum
-    Tensor tmp_correct(framework::proto::VarType::FP32);
+    Tensor tmp_correct(experimental::DataType::FLOAT32);
     tmp_correct.Resize(correct->dims());
     tmp_correct.mutable_data<float>(ctx.GetPlace());
     const auto& runner_reduce_sum =
@@ -110,7 +113,8 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     correct->mutable_data<int>(ctx.GetPlace());
     const auto& runner_cast_correct = NpuOpRunner(
         "Cast", {tmp_correct}, {*correct},
-        {{"dst_type", static_cast<int>(ConvertToNpuDtype(correct->type()))}});
+        {{"dst_type", static_cast<int>(ConvertToNpuDtype(
+                          framework::TransToProtoVarType(correct->dtype())))}});
     runner_cast_correct.Run(stream);
 
     // [total]
@@ -118,7 +122,7 @@ class AccuracyNPUKernel : public framework::OpKernel<T> {
     FillNpuTensorWithConstant<int>(total, static_cast<int>(num_samples));
 
     // use `total` of type `float32` for calculating accuracy
-    Tensor tmp_total(framework::proto::VarType::FP32);
+    Tensor tmp_total(experimental::DataType::FLOAT32);
     tmp_total.Resize(total->dims());
     tmp_total.mutable_data<float>(ctx.GetPlace());
     FillNpuTensorWithConstant<float>(&tmp_total,
diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
index cd92aaf0a108c..b4f801f11f99d 100644
--- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
@@ -61,7 +61,8 @@ class ConcatMKLDNNHandler
       concat_axis = concat_axis + rank;
     }
 
-    memory::data_type dt = framework::ToMKLDNNDataType(inputs[0]->type());
+    memory::data_type dt = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(inputs[0]->dtype()));
     std::vector<memory::desc> srcs_md;
     srcs_md.reserve(inputs.size());
 
@@ -185,10 +186,11 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     std::vector<int64_t> offset(dout_vec_dims.size(), 0);
 
-    dnnl::memory::data_type dout_type =
-        framework::ToMKLDNNDataType(dout->type());
-    platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
-                                                   dout_type, onednn_engine);
+    dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(dout->dtype()));
+    platform::ReorderMKLDNNHandler reorder_handler(
+        dout_vec_dims, framework::TransToProtoVarType(dout->dtype()), dout_type,
+        onednn_engine);
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         dout->format(), platform::to_void_cast(dout->data<T>()));
 
diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
index eef38bf99b136..c5215751c8325 100644
--- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -48,14 +48,16 @@ static dnnl::memory::data_type GetDstType(bool is_int8, bool is_bfloat16,
       dst_dt = dnnl::memory::data_type::f32;
     }
     if (fuse_residual_conn && residual_param) {
-      auto residual_dt = framework::ToMKLDNNDataType(residual_param->type());
+      auto residual_dt = framework::ToMKLDNNDataType(
+          framework::TransToProtoVarType(residual_param->dtype()));
       if (dst_dt != residual_dt) dst_dt = residual_dt;
     }
   } else {
     if (!force_fp32_output && is_bfloat16) {
       dst_dt = dnnl::memory::data_type::bf16;
       if (fuse_residual_conn && residual_param) {
-        dst_dt = framework::ToMKLDNNDataType(residual_param->type());
+        dst_dt = framework::ToMKLDNNDataType(
+            framework::TransToProtoVarType(residual_param->dtype()));
       }
     }
   }
@@ -202,7 +204,8 @@ class ConvMKLDNNHandlerT
       dnnl::memory::desc src_md, weights_md;
       if (platform::is_int8<T>()) {
         src_md = platform::MKLDNNMemDesc(
-            src_tz, framework::ToMKLDNNDataType(input->type()),
+            src_tz, framework::ToMKLDNNDataType(
+                        framework::TransToProtoVarType(input->dtype())),
             chosen_memory_format);
         weights_md = platform::MKLDNNMemDesc(
             weights_tz, dnnl::memory::data_type::s8, chosen_memory_format);
@@ -669,7 +672,8 @@ class ConvMKLDNNHandlerT
   std::shared_ptr<dnnl::memory> AcquireResidualMemory(
       const framework::Tensor* residual_param) {
     void* residual_data =
-        residual_param->type() == framework::DataTypeTrait<T_out>::DataType()
+        framework::TransToProtoVarType(residual_param->dtype()) ==
+                framework::DataTypeTrait<T_out>::DataType()
             ? platform::to_void_cast<T_out>(residual_param->data<T_out>())
             : platform::to_void_cast<T>(residual_param->data<T>());
     auto residual_mem_p = this->AcquireMemory("@user_residual_data_mem_p");
@@ -679,7 +683,8 @@ class ConvMKLDNNHandlerT
     } else {
       auto user_residual_md = platform::MKLDNNMemDesc(
           framework::vectorize(residual_param->dims()),
-          framework::ToMKLDNNDataType(residual_param->type()),
+          framework::ToMKLDNNDataType(
+              framework::TransToProtoVarType(residual_param->dtype())),
           residual_param->format());
 
       return this->AcquireMemoryFromPrimitive(user_residual_md, residual_data,
@@ -951,8 +956,8 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
       // For convolution with groups convert from blocked to NCHW
       // otherwise there will be problems in next operators working on this data
       if (g > 1) {
-        dnnl::memory::data_type in_type =
-            framework::ToMKLDNNDataType(filter->type());
+        dnnl::memory::data_type in_type = framework::ToMKLDNNDataType(
+            framework::TransToProtoVarType(filter->dtype()));
         // for 3d conv with groups (six dimensional data reorder to goidhw)
         // for 2d conv with groups (five dimensional data reorder to goihw)
         // auto weights_tz = framework::vectorize(filter->dims());
@@ -961,8 +966,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
         dnnl::memory::format_tag out_format =
             weights_tz.size() == 6 ? dnnl::memory::format_tag::goidhw
                                    : dnnl::memory::format_tag::goihw;
-        platform::ReorderMKLDNNHandler handler(weights_tz, filter->type(),
-                                               in_type, mkldnn_engine);
+        platform::ReorderMKLDNNHandler handler(
+            weights_tz, framework::TransToProtoVarType(filter->dtype()),
+            in_type, mkldnn_engine);
         auto reorder_dst_memory_p =
             handler.AcquireDstMemory(filter_grad, out_format, ctx.GetPlace());
 
diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
index d4d78bdfb6613..e5ac28d4a5043 100644
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -64,8 +64,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
 
     auto src_tz = paddle::framework::vectorize<int64_t>(input->dims());
     auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims());
-    dnnl::memory::data_type src_dt =
-        paddle::framework::ToMKLDNNDataType(input->type());
+    dnnl::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(input->dtype()));
     MKLDNNMemoryFormat src_fmt = input->format();
 
     std::string key =
diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
index b1be0f0f8fb44..6c9d431d1c9fd 100644
--- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/operators/expand_v2_op.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 
@@ -75,7 +76,7 @@ class ExpandMKLDNNKernel : public paddle::framework::OpKernel<T> {
 
  private:
   dnnl::memory::format_tag GetExtendedFormatTag(
-      std::vector<int64_t>& dims, int new_size,
+      std::vector<int64_t>& dims, int new_size,  // NOLINT
       dnnl::memory::format_tag format_tag) const {
     dnnl::memory::desc md(dims, paddle::platform::MKLDNNGetDataType<T>(),
                           format_tag);
@@ -112,10 +113,11 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel<T> {
 
     auto& astream = MKLDNNDeviceContext::tls().get_stream();
     if (dout_vec_dims == dx_vec_dims) {
-      dnnl::memory::data_type dout_type =
-          paddle::framework::ToMKLDNNDataType(dout->type());
+      dnnl::memory::data_type dout_type = paddle::framework::ToMKLDNNDataType(
+          paddle::framework::TransToProtoVarType(dout->dtype()));
       paddle::platform::ReorderMKLDNNHandler reorder_handler(
-          dout_vec_dims, dout->type(), dout_type, onednn_engine);
+          dout_vec_dims, paddle::framework::TransToProtoVarType(dout->dtype()),
+          dout_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           dout->format(), paddle::platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
index d7353d2def7fc..8efd2b226cad6 100644
--- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h"
 #include <tuple>
+#include "paddle/fluid/framework/convert_utils.h"
 
 using dnnl::memory;
 using dnnl::primitive;
@@ -56,10 +57,11 @@ static Tensor FoldFirstAndLastDims(const MKLDNNDeviceContext& dev_ctx,
 
   auto output_dims = vectorize(output.dims());
 
-  memory::data_type input_type =
-      paddle::framework::ToMKLDNNDataType(input->type());
+  memory::data_type input_type = paddle::framework::ToMKLDNNDataType(
+      paddle::framework::TransToProtoVarType(input->dtype()));
   paddle::platform::ReorderMKLDNNHandler reorder_handler(
-      output_dims, input->type(), input_type, dev_ctx.GetEngine());
+      output_dims, paddle::framework::TransToProtoVarType(input->dtype()),
+      input_type, dev_ctx.GetEngine());
 
   auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
       memory::format_tag::abc,
diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
index 1b9d9b8f31d35..bc2dbf5696813 100644
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
@@ -310,9 +310,10 @@ std::shared_ptr<MulPrimitiveFactory<XT, YT, OT>> GetPrimitiveFactory(
     const Tensor *input_x, const Tensor *input_y,
     const dnnl::engine &mkldnn_engine) {
   std::string key = platform::CreateKey(
-      dev_ctx, input_x->type(), framework::vectorize(input_x->dims()),
-      input_y->type(), framework::vectorize(input_y->dims()),
-      ctx.OutputName("Out"));
+      dev_ctx, framework::TransToProtoVarType(input_x->dtype()),
+      framework::vectorize(input_x->dims()),
+      framework::TransToProtoVarType(input_y->dtype()),
+      framework::vectorize(input_y->dims()), ctx.OutputName("Out"));
   key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key);
 
   auto prim_creator = std::static_pointer_cast<MulPrimitiveFactory<XT, YT, OT>>(
diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
index eb0240e3e3d46..f50d183521c36 100644
--- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
@@ -94,7 +94,8 @@ class PoolingMKLDNNHandler
 
     const auto is_test = ctx.Attr<bool>("is_test");
 
-    const auto dt = framework::ToMKLDNNDataType(input->type());
+    const auto dt = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(input->dtype()));
 
     const auto exclude_padding = ctx.Attr<bool>("exclusive");
 
@@ -183,7 +184,8 @@ class PoolingMKLDNNHandler
     auto diff_src_tz = paddle::framework::vectorize<int64_t>(in_x_grad->dims());
     auto diff_dst_tz = paddle::framework::vectorize<int64_t>(out_grad->dims());
 
-    const auto dt = framework::ToMKLDNNDataType(in_x->type());
+    const auto dt = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(in_x->dtype()));
     auto src_md = dnnl::memory::desc(src_tz, dt, in_x->format());
     auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, MKLDNNMemoryFormat::any);
     auto diff_dst_md = dnnl::memory::desc(
diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
index 8d14de6f7c969..1b1bd69aec2f4 100644
--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -52,7 +52,8 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
                                            "Scale of output cannot be 0.0"));
     if (shift_in != 0.0f) {
       PADDLE_ENFORCE_EQ(
-          input->type(), framework::proto::VarType::UINT8,
+          framework::TransToProtoVarType(input->dtype()),
+          framework::proto::VarType::UINT8,
           platform::errors::Unimplemented("Requantize does not support nonzero "
                                           "shift for signed input."));
     }
@@ -81,7 +82,8 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
 
     if (reorder_p == nullptr) {
       auto dst_tz = framework::vectorize(output->dims());
-      auto src_dt = framework::ToMKLDNNDataType(input->type());
+      auto src_dt = framework::ToMKLDNNDataType(
+          framework::TransToProtoVarType(input->dtype()));
       auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt;
 
       auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, input->format());
diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
index 06142e95532c5..744fd34062a36 100644
--- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc
@@ -74,9 +74,11 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
 
     auto x_vec_dims = framework::vectorize(x_dims);
 
-    dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
-    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
-                                                   x_type, onednn_engine);
+    dnnl::memory::data_type x_type =
+        framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype()));
+    platform::ReorderMKLDNNHandler reorder_handler(
+        x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type,
+        onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
@@ -99,8 +101,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferInOutShape(const framework::ExecutionContext& ctx,
-                       framework::DDim& x_dims,
-                       framework::DDim& out_dims) const {
+                       framework::DDim& x_dims,            // NOLINT
+                       framework::DDim& out_dims) const {  // NOLINT
     switch (op_name) {
       case ReshapeKernelOpName::reshape:
         InferShapeReshapeOp(ctx, x_dims, out_dims);
@@ -127,8 +129,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferShapeReshapeOp(const framework::ExecutionContext& ctx,
-                           framework::DDim& x_dims,
-                           framework::DDim& out_dims) const {
+                           framework::DDim& x_dims,            // NOLINT
+                           framework::DDim& out_dims) const {  // NOLINT
     auto* x = ctx.Input<LoDTensor>("X");
     auto* out = ctx.Output<LoDTensor>("Out");
     x_dims = x->dims();
@@ -137,8 +139,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferShapeReshape2Op(const framework::ExecutionContext& ctx,
-                            framework::DDim& x_dims,
-                            framework::DDim& out_dims) const {
+                            framework::DDim& x_dims,            // NOLINT
+                            framework::DDim& out_dims) const {  // NOLINT
     auto* out = ctx.Output<LoDTensor>("Out");
     auto* xshape = ctx.Output<LoDTensor>("XShape");
     auto xshape_dims = xshape->dims();
@@ -149,9 +151,10 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
 
   // in reshape1/2 ops  "ShapeTensor" has highest priority and "Shape" has
   // second highest priority
-  void ChangeReshapeOutDimsIfNeeded(const framework::ExecutionContext& ctx,
-                                    framework::DDim& x_dims,
-                                    framework::DDim& out_dims) const {
+  void ChangeReshapeOutDimsIfNeeded(
+      const framework::ExecutionContext& ctx,
+      framework::DDim& x_dims,            // NOLINT
+      framework::DDim& out_dims) const {  // NOLINT
     auto list_new_shape_tensor = ctx.MultiInput<Tensor>("ShapeTensor");
     if (list_new_shape_tensor.size() > 0) {
       auto new_shape = extract_shape(list_new_shape_tensor);
@@ -167,8 +170,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferShapeSqueezeOp(const framework::ExecutionContext& ctx,
-                           framework::DDim& x_dims,
-                           framework::DDim& out_dims) const {
+                           framework::DDim& x_dims,            // NOLINT
+                           framework::DDim& out_dims) const {  // NOLINT
     auto* x = ctx.Input<LoDTensor>("X");
     x_dims = x->dims();
     const auto& axes = ctx.Attr<std::vector<int>>("axes");
@@ -176,8 +179,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferShapeSqueeze2Op(const framework::ExecutionContext& ctx,
-                            framework::DDim& x_dims,
-                            framework::DDim& out_dims) const {
+                            framework::DDim& x_dims,            // NOLINT
+                            framework::DDim& out_dims) const {  // NOLINT
     auto* out = ctx.Output<LoDTensor>("Out");
     auto* xshape = ctx.Output<LoDTensor>("XShape");
     auto xshape_dims = xshape->dims();
@@ -186,8 +189,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel<T> {
   }
 
   void InferShapeFlattenOp(const framework::ExecutionContext& ctx,
-                           framework::DDim& x_dims,
-                           framework::DDim& out_dims) const {
+                           framework::DDim& x_dims,            // NOLINT
+                           framework::DDim& out_dims) const {  // NOLINT
     auto x = ctx.Input<LoDTensor>("X");
     x_dims = x->dims();
     auto axes = ctx.Attr<int>("axis");
@@ -324,10 +327,11 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
 
     auto dout_vec_dims = framework::vectorize(dout->dims());
 
-    dnnl::memory::data_type dout_type =
-        framework::ToMKLDNNDataType(dout->type());
-    platform::ReorderMKLDNNHandler reorder_handler(dout_vec_dims, dout->type(),
-                                                   dout_type, onednn_engine);
+    dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(dout->dtype()));
+    platform::ReorderMKLDNNHandler reorder_handler(
+        dout_vec_dims, framework::TransToProtoVarType(dout->dtype()), dout_type,
+        onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         dout->format(), platform::to_void_cast(dout->data<T>()));
@@ -347,7 +351,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
   }
 
   void InferOutputShapeInGrad(const framework::ExecutionContext& ctx,
-                              framework::DDim& x_dims) const {
+                              framework::DDim& x_dims) const {  // NOLINT
     switch (op_name) {
       case ReshapeKernelOpName::reshape:
         InferShapeReshapeSqueezeGradOp(ctx, x_dims);
@@ -373,20 +377,22 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel<T, op_name> {
     }
   }
 
-  void InferShapeReshapeSqueezeGradOp(const framework::ExecutionContext& ctx,
-                                      framework::DDim& dx_dims) const {
+  void InferShapeReshapeSqueezeGradOp(
+      const framework::ExecutionContext& ctx,
+      framework::DDim& dx_dims) const {  // NOLINT
     auto* dx = ctx.Output<LoDTensor>(framework::GradVarName("X"));
     dx_dims = dx->dims();
   }
 
   void InferShapeReshape2Squeeze2Flatten2GradOp(
-      const framework::ExecutionContext& ctx, framework::DDim& dx_dims) const {
+      const framework::ExecutionContext& ctx,
+      framework::DDim& dx_dims) const {  // NOLINT
     auto xshape_dims = ctx.Input<framework::LoDTensor>("XShape")->dims();
     dx_dims = framework::slice_ddim(xshape_dims, 1, xshape_dims.size());
   }
 
   void InferShapeFlattenGradOp(const framework::ExecutionContext& ctx,
-                               framework::DDim& dx_dims) const {
+                               framework::DDim& dx_dims) const {  // NOLINT
     dx_dims = ctx.Input<LoDTensor>("X")->dims();
   }
 };
diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
index e5f70fa10e375..d0059571cdc9b 100644
--- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
@@ -97,10 +97,12 @@ class SliceMKLDNNKernel : public framework::OpKernel<T> {
 
     out->Resize(framework::make_ddim(slice_dims));
 
-    dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
+    dnnl::memory::data_type x_type =
+        framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype()));
 
-    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
-                                                   x_type, onednn_engine);
+    platform::ReorderMKLDNNHandler reorder_handler(
+        x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type,
+        onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
@@ -192,15 +194,16 @@ class SliceGradMKLDNNKernel : public framework::OpKernel<T> {
       slice_dims[axes[i]] = ends[i] - starts[i];
     }
 
-    dnnl::memory::data_type dout_type =
-        framework::ToMKLDNNDataType(dout->type());
+    dnnl::memory::data_type dout_type = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(dout->dtype()));
     dnnl::memory::desc md(dout_vec_dims, platform::MKLDNNGetDataType<T>(),
                           dout->format());
     dnnl::memory::format_tag reorder_format_tag =
         platform::GetMKLDNNFormat(md.reshape(slice_dims));
 
-    platform::ReorderMKLDNNHandler reorder_handler(slice_dims, dout->type(),
-                                                   dout_type, onednn_engine);
+    platform::ReorderMKLDNNHandler reorder_handler(
+        slice_dims, framework::TransToProtoVarType(dout->dtype()), dout_type,
+        onednn_engine);
 
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         reorder_format_tag, platform::to_void_cast(dout->data<T>()));
diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
index a8e1e6e8a0e4c..f87b75f57d4d0 100644
--- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc
@@ -90,14 +90,16 @@ class SplitMKLDNNKernel : public framework::OpKernel<T> {
 
     auto x_vec_dims = framework::vectorize(x_dims);
 
-    dnnl::memory::data_type x_type = framework::ToMKLDNNDataType(x->type());
+    dnnl::memory::data_type x_type =
+        framework::ToMKLDNNDataType(framework::TransToProtoVarType(x->dtype()));
 
     auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
 
     std::vector<int64_t> offset(x_vec_dims.size(), 0);
 
-    platform::ReorderMKLDNNHandler reorder_handler(x_vec_dims, x->type(),
-                                                   x_type, onednn_engine);
+    platform::ReorderMKLDNNHandler reorder_handler(
+        x_vec_dims, framework::TransToProtoVarType(x->dtype()), x_type,
+        onednn_engine);
     auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
         x->format(), platform::to_void_cast(x->data<T>()));
 
diff --git a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc
index ca3aab9afc05b..ad9f8b3aa79ce 100644
--- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc
@@ -46,7 +46,8 @@ class StackMKLDNNHandler
     // in stack op all inputs must have same dims
     auto input_dims = framework::vectorize<int64_t>(inputs[0]->dims());
 
-    memory::data_type dt = framework::ToMKLDNNDataType(inputs[0]->type());
+    memory::data_type dt = framework::ToMKLDNNDataType(
+        framework::TransToProtoVarType(inputs[0]->dtype()));
     std::vector<memory::desc> srcs_md;
     memory::desc dst_md;
     MKLDNNMemoryFormat dst_fmt;
diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
index 0c442f2fe4d59..5df2a546812ad 100644
--- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
@@ -165,7 +165,9 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
       auto& in_out = in_vars[0]->Get<framework::LoDTensor>();
       auto output_tz = framework::vectorize<int64_t>(output->dims());
       platform::ReorderMKLDNNHandler reorder_handler(
-          output_tz, output->type(), framework::ToMKLDNNDataType(in_out.type()),
+          output_tz, framework::TransToProtoVarType(output->dtype()),
+          framework::ToMKLDNNDataType(
+              framework::TransToProtoVarType(in_out.dtype())),
           dev_ctx.GetEngine());
 
       auto target_mem = reorder_handler.AcquireDstMemory(
diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc
index 50071820e62c8..b1001b4e5684b 100644
--- a/paddle/fluid/operators/mlu/mlu_baseop.cc
+++ b/paddle/fluid/operators/mlu/mlu_baseop.cc
@@ -177,8 +177,9 @@ MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor,
 }
 
 MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor)
-    : MLUCnnlTensorDesc(tensor, CNNL_LAYOUT_ARRAY,
-                        ToCnnlDataType(tensor.type())) {}
+    : MLUCnnlTensorDesc(
+          tensor, CNNL_LAYOUT_ARRAY,
+          ToCnnlDataType(framework::TransToProtoVarType(tensor.dtype()))) {}
 
 MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor,
                                      cnnlTensorLayout_t layout,
diff --git a/paddle/fluid/operators/mul_op_npu.cc b/paddle/fluid/operators/mul_op_npu.cc
index 97b1ef0010f98..5a9696ec8ccd1 100644
--- a/paddle/fluid/operators/mul_op_npu.cc
+++ b/paddle/fluid/operators/mul_op_npu.cc
@@ -68,8 +68,10 @@ class MulNPUKernel : public framework::OpKernel<T> {
                         platform::errors::InvalidArgument(
                             "now only support x_num_col_dims == 2: but got %d",
                             x_num_col_dims));
-      if (x->type() == framework::proto::VarType::FP16 &&
-          y->type() == framework::proto::VarType::FP16) {
+      if (framework::TransToProtoVarType(x->dtype()) ==
+              framework::proto::VarType::FP16 &&
+          framework::TransToProtoVarType(y->dtype()) ==
+              framework::proto::VarType::FP16) {
         // NOTE: When the dim of the input and output shapes is inconsistent,
         // (Boradcast) BatchMatMul NPU OP only support FP16.
         out->mutable_data<T>(ctx.GetPlace());
@@ -187,8 +189,10 @@ class MulGradNPUKernel : public framework::OpKernel<T> {
 
       if (dx) {
         // tmp_dout * y [2, 3, 5] * [4,5] => [2, 3, 4]
-        if (dout->type() == framework::proto::VarType::FP16 &&
-            y->type() == framework::proto::VarType::FP16) {
+        if (framework::TransToProtoVarType(dout->dtype()) ==
+                framework::proto::VarType::FP16 &&
+            framework::TransToProtoVarType(y->dtype()) ==
+                framework::proto::VarType::FP16) {
           // NOTE: When the dim of the input and output shapes is inconsistent,
           // (Boradcast) BatchMatMul NPU OP only support FP16.
           dx->mutable_data<T>(ctx.GetPlace());
diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc
index 57b6c92d2836c..24b506ebf8a06 100644
--- a/paddle/fluid/operators/one_hot_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_op_npu.cc
@@ -43,7 +43,8 @@ class OneHotNPUKernel : public framework::OpKernel<T> {
     out->mutable_data<float>(ctx.GetPlace());
 
     float on_value = 1.0f, off_value = 0.0f;
-    if (in->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(in->dtype()) ==
+        framework::proto::VarType::INT32) {
       NpuOpRunner runner;
       runner.SetType("OneHot")
           .AddInput(*in)
diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc
index 34568c3c19e66..acf6baf50b418 100644
--- a/paddle/fluid/operators/one_hot_v2_op_npu.cc
+++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc
@@ -42,7 +42,8 @@ class OneHotV2NPUKernel : public framework::OpKernel<T> {
     out->mutable_data<float>(ctx.GetPlace());
 
     float on_value = 1.0f, off_value = 0.0f;
-    if (in->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(in->dtype()) ==
+        framework::proto::VarType::INT32) {
       NpuOpRunner runner;
       runner.SetType("OneHot")
           .AddInput(*in)
diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc
index c1846f148fd92..56c5d48b9f497 100644
--- a/paddle/fluid/operators/optimizers/adam_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc
@@ -122,9 +122,9 @@ class AdamNPUKernel : public framework::OpKernel<T> {
     const Tensor* beta2_tensor = nullptr;
     const Tensor* epsilon_tensor = nullptr;
 
-    Tensor beta1_tmp(framework::proto::VarType::FP32);
-    Tensor beta2_tmp(framework::proto::VarType::FP32);
-    Tensor epsilon_tmp(framework::proto::VarType::FP32);
+    Tensor beta1_tmp(experimental::DataType::FLOAT32);
+    Tensor beta2_tmp(experimental::DataType::FLOAT32);
+    Tensor epsilon_tmp(experimental::DataType::FLOAT32);
 
     if (ctx.HasInput("Beta1Tensor")) {
       beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
@@ -255,9 +255,9 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
           ctx.template device_context<paddle::platform::NPUDeviceContext>()
               .stream();
 
-      Tensor one(framework::proto::VarType::FP32);
-      Tensor decay(framework::proto::VarType::FP32);
-      Tensor tmp(framework::proto::VarType::FP32);
+      Tensor one(experimental::DataType::FLOAT32);
+      Tensor decay(experimental::DataType::FLOAT32);
+      Tensor tmp(experimental::DataType::FLOAT32);
 
       tmp.mutable_data<float>({1}, place);
       one.mutable_data<float>({1}, place);
diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
index 4ee761cd6fad3..12aa56ebb5c7c 100644
--- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
@@ -50,15 +50,15 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
         const Tensor *rho_tensor = nullptr;
         const Tensor *momentum_tensor = nullptr;
         const Tensor *epsilon_tensor = nullptr;
-        Tensor rho_tmp(framework::proto::VarType::FP32);
+        Tensor rho_tmp(experimental::DataType::FLOAT32);
         rho_tmp.mutable_data<T>({1}, ctx.GetPlace());
         FillNpuTensorWithConstant<T>(&rho_tmp, rho);
         rho_tensor = &rho_tmp;
-        Tensor momentum_tmp(framework::proto::VarType::FP32);
+        Tensor momentum_tmp(experimental::DataType::FLOAT32);
         momentum_tmp.mutable_data<T>({1}, ctx.GetPlace());
         FillNpuTensorWithConstant<T>(&momentum_tmp, momentum);
         momentum_tensor = &momentum_tmp;
-        Tensor epsilon_tmp(framework::proto::VarType::FP32);
+        Tensor epsilon_tmp(experimental::DataType::FLOAT32);
         epsilon_tmp.mutable_data<T>({1}, ctx.GetPlace());
         FillNpuTensorWithConstant<T>(&epsilon_tmp, epsilon);
         epsilon_tensor = &epsilon_tmp;
diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc
index 08c40e02b1702..c3cf7da189d55 100644
--- a/paddle/fluid/operators/optimizers/sgd_op.cc
+++ b/paddle/fluid/operators/optimizers/sgd_op.cc
@@ -97,8 +97,9 @@ class SGDOp : public framework::OperatorWithKernel {
       const std::string &var_name, const framework::Tensor &tensor,
       const framework::OpKernelType &expected_kernel_type) const {
     if (var_name == "LearningRate") {
-      return framework::OpKernelType(tensor.type(), tensor.place(),
-                                     tensor.layout());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(tensor.dtype()), tensor.place(),
+          tensor.layout());
     }
     return framework::OpKernelType(expected_kernel_type.data_type_,
                                    tensor.place(), tensor.layout());
diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h
index 23e37ea27b54f..a35fd36a295df 100644
--- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h
+++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h
@@ -17,6 +17,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
@@ -286,7 +287,7 @@ class SparseMomentumOpKernel : public framework::OpKernel<T> {
     const bool multi_precision = ctx.Attr<bool>("multi_precision");
     bool use_nesterov = ctx.Attr<bool>("use_nesterov");
     auto index = ctx.Input<framework::Tensor>("Index");
-    const auto& index_type = index->type();
+    const auto& index_type = framework::TransToProtoVarType(index->dtype());
     if (multi_precision) {
       if (use_nesterov) {
         auto update_method = UseNesterov<MPDType>();
@@ -354,7 +355,8 @@ class SparseMomentumOpKernel : public framework::OpKernel<T> {
       Tensor cpu_axis;
       const Tensor* axis_tensor = ctx.Input<Tensor>("Axis");
       framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis);
-      const auto& axis_type = axis_tensor->type();
+      const auto& axis_type =
+          framework::TransToProtoVarType(axis_tensor->dtype());
       if (axis_type == framework::proto::VarType::INT32) {
         axis = static_cast<int>(cpu_axis.data<int32_t>()[0]);
       } else if (axis_type == framework::proto::VarType::INT64) {
diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc
index bc6545b276ae7..fedadc7581e71 100644
--- a/paddle/fluid/operators/partial_concat_op.cc
+++ b/paddle/fluid/operators/partial_concat_op.cc
@@ -88,7 +88,7 @@ class PartialConcatOp : public framework::OperatorWithKernel {
     bool flag = 0;
     for (auto *input : inputs) {
       if (input->IsInitialized() && input->numel() > 0) {
-        input_data_type = input->type();
+        input_data_type = framework::TransToProtoVarType(input->dtype());
         flag = 1;
         break;
       }
diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc
index fb60ab54b77b2..242ee2f9cc97a 100644
--- a/paddle/fluid/operators/partial_sum_op.cc
+++ b/paddle/fluid/operators/partial_sum_op.cc
@@ -90,7 +90,7 @@ class PartialSumOp : public framework::OperatorWithKernel {
     bool flag = 0;
     for (auto *input : inputs) {
       if (input->IsInitialized() && input->numel() > 0) {
-        input_data_type = input->type();
+        input_data_type = framework::TransToProtoVarType(input->dtype());
         flag = 1;
         break;
       }
diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc
index b7049019bc4be..a4d855f00bd61 100644
--- a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc
+++ b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc
@@ -96,11 +96,12 @@ void InitTensorsOnClient(framework::Scope* scope, int64_t rows_numel,
   std::vector<float> temp_vec{0};
   float* temp_ptr = temp_vec.data();
 
-  memory::Copy(
-      place, reinterpret_cast<void*>(micro_id_ptr), platform::CPUPlace(),
-      reinterpret_cast<void*>(temp_ptr),
-      micro_id_var->numel() * framework::SizeOfType(micro_id_var->type()),
-      stream);
+  memory::Copy(place, reinterpret_cast<void*>(micro_id_ptr),
+               platform::CPUPlace(), reinterpret_cast<void*>(temp_ptr),
+               micro_id_var->numel() *
+                   framework::SizeOfType(
+                       framework::TransToProtoVarType(micro_id_var->dtype())),
+               stream);
 
   auto x_var = scope->Var("x")->GetMutable<framework::LoDTensor>();
   float* x_ptr =
@@ -110,7 +111,8 @@ void InitTensorsOnClient(framework::Scope* scope, int64_t rows_numel,
   float* x_vec_ptr = x_vec.data();
   memory::Copy(place, reinterpret_cast<void*>(x_ptr), platform::CPUPlace(),
                reinterpret_cast<void*>(x_vec_ptr),
-               x_var->numel() * framework::SizeOfType(x_var->type()), stream);
+               x_var->numel() * framework::DataTypeSize(x_var->dtype()),
+               stream);
 
   // auto res_var = scope->Var("res")->GetMutable<framework::LoDTensor>();
   // float* res_ptr =
diff --git a/paddle/fluid/operators/put_along_axis_op.cu b/paddle/fluid/operators/put_along_axis_op.cu
index 800da8a275c2d..738ced5afb085 100644
--- a/paddle/fluid/operators/put_along_axis_op.cu
+++ b/paddle/fluid/operators/put_along_axis_op.cu
@@ -38,7 +38,7 @@ class PutAlongAxisCUDAKernel : public framework::OpKernel<T> {
     auto result = ctx.Output<Tensor>("Result");
     const platform::DeviceContext &device_ctx = ctx.device_context();
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
 
     framework::TensorCopy(*input, ctx.GetPlace(), result);
     if (reduce_op == "add") {
@@ -90,7 +90,7 @@ class PutAlongAxisGradOpCUDAKernel : public framework::OpKernel<T> {
     auto result_grad = ctx.Input<Tensor>(framework::GradVarName("Result"));
     auto axis = ctx.Attr<int>("Axis");
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (input_grad) {
       framework::TensorCopy(*result_grad, ctx.GetPlace(), input_grad);
       if (index_type == framework::proto::VarType::INT32) {
diff --git a/paddle/fluid/operators/put_along_axis_op.h b/paddle/fluid/operators/put_along_axis_op.h
index 0b4481ceacf73..c0be2584f4c3d 100644
--- a/paddle/fluid/operators/put_along_axis_op.h
+++ b/paddle/fluid/operators/put_along_axis_op.h
@@ -40,7 +40,7 @@ class PutAlongAxisOpKernel : public framework::OpKernel<T> {
 
     framework::TensorCopy(*input, ctx.GetPlace(), result);
     const platform::DeviceContext &device_ctx = ctx.device_context();
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (reduce_op == "add") {
       if (index_type == framework::proto::VarType::INT32) {
         cpu_scatter_add_kernel<T, int32_t>(*result, axis, *index, *value,
@@ -90,7 +90,7 @@ class PutAlongAxisGradOpKernel : public framework::OpKernel<T> {
     auto index = ctx.Input<Tensor>("Index");
     auto result_grad = ctx.Input<Tensor>(framework::GradVarName("Result"));
     auto axis = ctx.Attr<int>("Axis");
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
 
     if (input_grad) {
       framework::TensorCopy(*result_grad, ctx.GetPlace(), input_grad);
diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc
index c50a8b731d5a5..ace99db1220b5 100644
--- a/paddle/fluid/operators/pyramid_hash_op.cc
+++ b/paddle/fluid/operators/pyramid_hash_op.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include <xxhash.h>
 #include <algorithm>
 #include <cmath>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/search_compute.h"
 
@@ -391,7 +392,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
     if (iter != iter_end) {
       exit(1);
     }
-    auto weight_type = _blobs_0->type();
+    auto weight_type = framework::TransToProtoVarType(_blobs_0->dtype());
     if (_is_training == 0 && weight_type != framework::proto::VarType::INT8) {
       axpy_noadd(top_data, top_data, top->dims()[0] * top->dims()[1],
                  _drop_out_percent);
diff --git a/paddle/fluid/operators/randint_op.cu b/paddle/fluid/operators/randint_op.cu
index 40e390b0b8724..be6b6c53981cc 100644
--- a/paddle/fluid/operators/randint_op.cu
+++ b/paddle/fluid/operators/randint_op.cu
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <thrust/random.h>
 #include <thrust/transform.h>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/generator.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/uniform_random_op.h"
@@ -45,7 +46,7 @@ class GPURandintKernel : public framework::OpKernel<T> {
     T high = static_cast<T>(context.Attr<int>("high")) - 1;
     framework::LoDTensor tensor;
     tensor.Resize(out->dims());
-    tensor.mutable_data(cpu, dtype);
+    tensor.mutable_data(cpu, framework::TransToPtenDataType(dtype));
     T* data = tensor.mutable_data<T>(cpu);
 
     int64_t size = out->numel();
diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc
index 4aad78f1c49cf..6393ff2135d1d 100644
--- a/paddle/fluid/operators/reader/buffered_reader.cc
+++ b/paddle/fluid/operators/reader/buffered_reader.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/operators/reader/buffered_reader.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/profiler.h"
 
 namespace paddle {
@@ -126,8 +127,8 @@ void BufferedReader::ReadAsync(size_t i) {
             cuda[i].set_layout(cpu[i].layout());
             cuda_pinned_ptrs[i] =
                 cuda[i].mutable_data(cuda_pinned_place, cpu[i].type());
-            auto size =
-                cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type());
+            auto size = cpu[i].numel() *
+                        paddle::framework::DataTypeSize(cpu[i].dtype());
 
             memory::Copy(cuda_pinned_place, cuda_pinned_ptrs[i], cpu[i].place(),
                          cpu[i].data(), size);
@@ -175,7 +176,7 @@ void BufferedReader::ReadAsync(size_t i) {
           auto cpu_ptr = cpu[i].data();
           auto gpu_ptr = gpu_ptrs[i];
           auto size =
-              cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type());
+              cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
           if (platform::is_cuda_pinned_place(cpu_place)) {
             memory::Copy(place_, gpu_ptr, cpu_place, cpu_ptr, size,
                          stream_.get());
@@ -234,7 +235,7 @@ void BufferedReader::ReadAsync(size_t i) {
         auto cpu_ptr = cpu[i].data();
         auto npu_ptr = npu_ptrs[i];
         auto size =
-            cpu[i].numel() * paddle::framework::SizeOfType(cpu[i].type());
+            cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
         if ((platform::is_npu_place(cpu_place))) {
           memory::Copy(place_, npu_ptr, cpu_place, cpu_ptr, size,
                        stream_.get());
diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc
index 38894495b4ca0..73bc67287c278 100644
--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/platform/profiler.h"
@@ -141,7 +142,7 @@ class ReadOp : public framework::OperatorBase {
                 "shape = [%s], but received fed shape [%s]",
                 out_arg_names[i], shapes[i].size(), shapes[i], in_dims));
         PADDLE_ENFORCE_EQ(
-            ins[i].type(), var_types[i],
+            framework::TransToProtoVarType(ins[i].dtype()), var_types[i],
             platform::errors::InvalidArgument(
                 "The data type of fed Variable %s must be %s, but received %s",
                 out_arg_names[i], var_types[i], ins[i].type()));
diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc
index fcfdb8b72ecde..944ae2dc62a55 100644
--- a/paddle/fluid/operators/recurrent_op.cc
+++ b/paddle/fluid/operators/recurrent_op.cc
@@ -265,7 +265,7 @@ void RecurrentOp::RunImpl(const framework::Scope &scope,
               framework::LoDTensor *dst_tensor) {
             // create output tensor at begin
             dst_tensor->Resize(PrependDims(seq_len, src_tensor.dims()));
-            dst_tensor->mutable_data(place, src_tensor.type());
+            dst_tensor->mutable_data(place, src_tensor.dtype());
 
             auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1);
             // Explicit copy output since the local RNN scope can be destroyed
@@ -438,7 +438,8 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope,
           auto &inside_tensor =
               cur_scope.FindVar(inside_grad_name)->Get<framework::LoDTensor>();
           framework::AttributeMap attrs;
-          attrs["dtype"] = inside_tensor.type();
+          attrs["dtype"] =
+              framework::TransToProtoVarType(inside_tensor.dtype());
           attrs["shape"] = framework::vectorize<int>(inside_tensor.dims());
           attrs["value"] = 0.0f;
 
@@ -474,7 +475,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope,
             }
             // Alloc outside memory
             outside->Resize(PrependDims(seq_len, inside.dims()));
-            outside->mutable_data(place, inside.type());
+            outside->mutable_data(place, inside.dtype());
 
             auto dst = outside->Slice(seq_offset, seq_offset + 1);
             framework::TensorCopy(inside, place, dev_ctx, &dst);
@@ -492,7 +493,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope,
             [&](const framework::LoDTensor &inside,
                 framework::LoDTensor *outside) {
               outside->Resize(inside.dims());
-              outside->mutable_data(place, inside.type());
+              outside->mutable_data(place, inside.dtype());
               framework::TensorCopy(inside, place, dev_ctx, outside);
             },
             true /*is_backward*/);
diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
index ad7defd22d056..5dcb81c75407f 100644
--- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
+++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#pragma once
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 
 namespace paddle {
@@ -21,11 +23,10 @@ using paddle::framework::LoDTensor;
 using paddle::framework::Tensor;
 using platform::to_void_cast;
 
-inline std::vector<int64_t> CalculateReducedDims(const Tensor* input,
-                                                 const Tensor* output,
-                                                 std::vector<int>& reduce_dims,
-                                                 bool reduce_all,
-                                                 bool keep_dim) {
+inline std::vector<int64_t> CalculateReducedDims(
+    const Tensor* input, const Tensor* output,
+    std::vector<int>& reduce_dims,  // NOLINT
+    bool reduce_all, bool keep_dim) {
   if (keep_dim) return framework::vectorize(output->dims());
 
   if (reduce_all)
@@ -69,10 +70,11 @@ class ReduceMKLDNNKernel : public framework::OpKernel<T> {
     // In that case reorder must be executed to maintain compatibility with
     // PaddlePaddle reduce op
     if (input_dims == output_dims) {
-      dnnl::memory::data_type input_type =
-          framework::ToMKLDNNDataType(input->type());
-      platform::ReorderMKLDNNHandler reorder_handler(input_dims, input->type(),
-                                                     input_type, onednn_engine);
+      dnnl::memory::data_type input_type = framework::ToMKLDNNDataType(
+          framework::TransToProtoVarType(input->dtype()));
+      platform::ReorderMKLDNNHandler reorder_handler(
+          input_dims, framework::TransToProtoVarType(input->dtype()),
+          input_type, onednn_engine);
 
       auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory(
           input->format(), platform::to_void_cast(input->data<T>()));
diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
index 7ad048cfe12cd..f99b72faba4ae 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc
@@ -36,12 +36,12 @@ class ReduceMaxNPUKernel : public framework::OpKernel<T> {
     cast_out.Resize(out->dims());
     cast_out.mutable_data<T>(place);
 
-    auto cast_out_dtype = x->type();
+    auto cast_out_dtype = framework::TransToProtoVarType(x->dtype());
     if (out_dtype != -1) {
       cast_out_dtype = static_cast<framework::proto::VarType::Type>(out_dtype);
     }
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) {
       if (cast_out_dtype == framework::proto::VarType::FP32) {
         out->mutable_data<float>(place);
       } else if (cast_out_dtype == framework::proto::VarType::FP16) {
@@ -75,7 +75,8 @@ class ReduceMaxNPUKernel : public framework::OpKernel<T> {
 
     const auto& dev_ctx =
         ctx.template device_context<paddle::platform::NPUDeviceContext>();
-    if (x->type() == framework::proto::VarType::INT64) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+        framework::proto::VarType::INT64) {
       auto op_func = [](const std::vector<Tensor>& inputs,
                         const std::vector<Tensor>& outputs,
                         const NPUAttributeMap& attrs,
@@ -94,7 +95,7 @@ class ReduceMaxNPUKernel : public framework::OpKernel<T> {
       runner.Run(dev_ctx.stream());
     }
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) {
       auto dst_dtype = ConvertToNpuDtype(cast_out_dtype);
       const auto& runner_cast =
           NpuOpRunner("Cast", {cast_out}, {*out},
diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc
index ef7e9940f0590..a7ea28314fa79 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc
@@ -46,10 +46,12 @@ class ReduceMeanMLUKernel : public framework::OpKernel<T> {
       }
     }
 
-    MLUCnnlTensorDesc input_desc(*input, CNNL_LAYOUT_ARRAY,
-                                 ToCnnlDataType(input->type()));
-    MLUCnnlTensorDesc output_desc(*output, CNNL_LAYOUT_ARRAY,
-                                  ToCnnlDataType(output->type()));
+    MLUCnnlTensorDesc input_desc(
+        *input, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input->dtype())));
+    MLUCnnlTensorDesc output_desc(
+        *output, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(output->dtype())));
 
     MLUCnnlReduceDesc reduction_desc(
         reduce_dims, CNNL_REDUCE_AVG, ToCnnlDataType<T>(),
@@ -89,7 +91,8 @@ class ReduceMeanGradMLUKernel : public framework::OpKernel<T> {
       reduce_numel *= input_dims[d];
     }
 
-    Tensor tmp_output_grad(output_grad->type());
+    Tensor tmp_output_grad(
+        framework::TransToProtoVarType(output_grad->dtype()));
     auto tmp_output_dims = input_dims;
     for (auto d : reduce_dims) {
       tmp_output_dims[d] = 1;
@@ -97,10 +100,13 @@ class ReduceMeanGradMLUKernel : public framework::OpKernel<T> {
     tmp_output_grad.ShareDataWith(*output_grad);
     tmp_output_grad.Resize(framework::make_ddim(tmp_output_dims));
 
-    MLUCnnlTensorDesc output_grad_desc(tmp_output_grad, CNNL_LAYOUT_ARRAY,
-                                       ToCnnlDataType(tmp_output_grad.type()));
-    MLUCnnlTensorDesc input_grad_desc(*input_grad, CNNL_LAYOUT_ARRAY,
-                                      ToCnnlDataType(input_grad->type()));
+    MLUCnnlTensorDesc output_grad_desc(
+        tmp_output_grad, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(
+            framework::TransToProtoVarType(tmp_output_grad.dtype())));
+    MLUCnnlTensorDesc input_grad_desc(
+        *input_grad, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(input_grad->dtype())));
 
     auto value = static_cast<T>(1.0 / static_cast<float>(reduce_numel));
     MLUCnnl::Fill(context, value, input_grad_desc.get(),
diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc
index d9a62ce4dc97d..96ad2e448f58a 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc
@@ -36,12 +36,12 @@ class ReduceMinNPUKernel : public framework::OpKernel<T> {
     cast_out.Resize(out->dims());
     cast_out.mutable_data<T>(place);
 
-    auto cast_out_dtype = x->type();
+    auto cast_out_dtype = framework::TransToProtoVarType(x->dtype());
     if (out_dtype != -1) {
       cast_out_dtype = static_cast<framework::proto::VarType::Type>(out_dtype);
     }
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->type()) != cast_out_dtype) {
       if (cast_out_dtype == framework::proto::VarType::FP32) {
         out->mutable_data<float>(place);
       } else if (cast_out_dtype == framework::proto::VarType::FP16) {
@@ -75,7 +75,7 @@ class ReduceMinNPUKernel : public framework::OpKernel<T> {
 
     const auto& dev_ctx =
         ctx.template device_context<paddle::platform::NPUDeviceContext>();
-    if (x->type() == framework::proto::VarType::INT64) {
+    if (x->dtype() == experimental::DataType::INT64) {
       auto op_func = [](const std::vector<Tensor>& inputs,
                         const std::vector<Tensor>& outputs,
                         const NPUAttributeMap& attrs,
@@ -94,7 +94,7 @@ class ReduceMinNPUKernel : public framework::OpKernel<T> {
       runner.Run(dev_ctx.stream());
     }
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->type()) != cast_out_dtype) {
       auto dst_dtype = ConvertToNpuDtype(cast_out_dtype);
       const auto& runner_cast =
           NpuOpRunner("Cast", {cast_out}, {*out},
diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h
index 4101c8b73e7a0..fa322bba8a0f8 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -25,6 +25,7 @@ limitations under the License. */
 #include "paddle/pten/kernels/funcs/math_function.h"
 
 // only can include the headers in paddle/pten/api dirs
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
 #include "paddle/pten/kernels/cpu/reduce.h"
 
@@ -242,8 +243,8 @@ class ReduceKernel : public framework::OpKernel<T> {
     auto* input = context.Input<Tensor>("X");
 
     if (out_dtype < 0) {
-      cast_out_dtype =
-          static_cast<framework::proto::VarType::Type>(input->type());
+      cast_out_dtype = static_cast<framework::proto::VarType::Type>(
+          framework::TransToProtoVarType(input->dtype()));
     } else {
       cast_out_dtype = static_cast<framework::proto::VarType::Type>(out_dtype);
     }
@@ -261,7 +262,7 @@ class ReduceKernel : public framework::OpKernel<T> {
         static_cast<const typename framework::ConvertToPtenContext<
             DeviceContext>::TYPE&>(dev_ctx),
         *input, reduce_all, tmp_dims, keep_dim,
-        pten::TransToPtenDataType(cast_out_dtype), output);
+        framework::TransToPtenDataType(cast_out_dtype), output);
   }
 };
 template <typename DeviceContext, typename OutT, typename Functor>
@@ -438,8 +439,9 @@ class ReduceGradKernel : public framework::OpKernel<T> {
     if (in_dtype >= 0) {
       Tensor tmp_tensor;
       auto* pre_input = context.Input<Tensor>(framework::GradVarName("Out"));
-      auto in_kernel_type =
-          framework::OpKernelType(pre_input->type(), context.GetPlace());
+      auto in_kernel_type = framework::OpKernelType(
+          framework::TransToProtoVarType(pre_input->dtype()),
+          context.GetPlace());
       auto out_kernel_type = framework::OpKernelType(
           static_cast<framework::proto::VarType::Type>(in_dtype),
           context.GetPlace());
@@ -681,25 +683,20 @@ class ReduceCudaKernel : public framework::OpKernel<T> {
     const Tensor* input = context.Input<Tensor>("X");
     Tensor* output = context.Output<Tensor>("Out");
     auto out_dtype = context.Attr<int>("out_dtype");
+    auto pt_out_dtype = paddle::framework::TransToPtenDataType(
+        static_cast<framework::proto::VarType::Type>(out_dtype));
     std::vector<int> dims = context.Attr<std::vector<int>>("dim");
 
     auto& dev_ctx = context.cuda_device_context();
 
     if (out_dtype >= 0) {
-      output->mutable_data(
-          dev_ctx.GetPlace(),
-          static_cast<framework::proto::VarType::Type>(out_dtype));
+      output->mutable_data(dev_ctx.GetPlace(), pt_out_dtype);
     } else {
-      output->mutable_data(
-          dev_ctx.GetPlace(),
-          static_cast<framework::proto::VarType::Type>(input->type()));
+      output->mutable_data(dev_ctx.GetPlace(), input->dtype());
     }
 
     std::vector<int64_t> dims_int64{dims.begin(), dims.end()};
 
-    auto pt_out_dtype = pten::TransToPtenDataType(
-        static_cast<framework::proto::VarType::Type>(out_dtype));
-
     pten::Reduce<T, ReduceOp, TransformOp>(
         dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output);
   }
@@ -716,6 +713,8 @@ class ReduceCudaGradKernel : public framework::OpKernel<T> {
         context.Input<framework::Tensor>(framework::GradVarName("Out"));
     auto* d_x = context.Output<framework::Tensor>(framework::GradVarName("X"));
     auto out_dtype = context.Attr<int>("in_dtype");
+    auto pt_out_dtype = framework::TransToPtenDataType(
+        static_cast<framework::proto::VarType::Type>(out_dtype));
     // get reduce_dim and reduce_num for reduce_mean_grad
     int dim_size = in_x->dims().size();
     std::vector<int> reduce_dims = GetReduceDim(dims, dim_size, reduce_all);
@@ -731,21 +730,14 @@ class ReduceCudaGradKernel : public framework::OpKernel<T> {
     new_d_out.Resize(paddle::framework::make_ddim(update_dims));
     auto& dev_ctx = context.cuda_device_context();
     if (out_dtype > 0) {
-      d_x->mutable_data(
-          dev_ctx.GetPlace(),
-          static_cast<framework::proto::VarType::Type>(out_dtype));
+      d_x->mutable_data(dev_ctx.GetPlace(), pt_out_dtype);
     } else {
-      d_x->mutable_data(
-          dev_ctx.GetPlace(),
-          static_cast<framework::proto::VarType::Type>(d_out->type()));
+      d_x->mutable_data(dev_ctx.GetPlace(), d_out->dtype());
     }
     auto pt_d_out = paddle::experimental::MakePtenDenseTensor(new_d_out);
     auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
-    auto pt_out_dtype = pten::TransToPtenDataType(
-        static_cast<framework::proto::VarType::Type>(out_dtype));
     if (out_dtype <= 0) {
-      pt_out_dtype = pten::TransToPtenDataType(
-          static_cast<framework::proto::VarType::Type>(d_out->type()));
+      pt_out_dtype = d_out->dtype();
     }
     using MPType = typename kps::details::MPTypeTrait<T>::Type;
     pten::ReduceGrad<T, TransformOp<T, MPType>>(
diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc
index 9de50b96d2727..4e24d07ee09e4 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc
@@ -36,12 +36,12 @@ class ReduceProdNPUKernel : public framework::OpKernel<T> {
     cast_out.Resize(out->dims());
     cast_out.mutable_data<T>(place);
 
-    auto cast_out_dtype = x->type();
+    auto cast_out_dtype = framework::TransToProtoVarType(x->dtype());
     if (out_dtype != -1) {
       cast_out_dtype = static_cast<framework::proto::VarType::Type>(out_dtype);
     }
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) {
       if (cast_out_dtype == framework::proto::VarType::FP32) {
         out->mutable_data<float>(place);
       } else if (cast_out_dtype == framework::proto::VarType::FP16) {
@@ -81,7 +81,7 @@ class ReduceProdNPUKernel : public framework::OpKernel<T> {
         NpuOpRunner("ReduceProdD", {*x}, {cast_out}, attr_input);
     runner.Run(stream);
 
-    if (x->type() != cast_out_dtype) {
+    if (framework::TransToProtoVarType(x->dtype()) != cast_out_dtype) {
       auto dst_dtype = ConvertToNpuDtype(cast_out_dtype);
       const auto& runner_cast =
           NpuOpRunner("Cast", {cast_out}, {*out},
diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h
index 79b3480afbcd7..fc3dba106ac93 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h
@@ -79,8 +79,9 @@ class ReduceSumGradKernel : public framework::OpKernel<T> {
       if (in_dtype >= 0) {
         Tensor tmp_tensor;
         auto* pre_input = context.Input<Tensor>(framework::GradVarName("Out"));
-        auto in_kernel_type =
-            framework::OpKernelType(pre_input->type(), context.GetPlace());
+        auto in_kernel_type = framework::OpKernelType(
+            framework::TransToProtoVarType(pre_input->dtype()),
+            context.GetPlace());
         auto out_kernel_type = framework::OpKernelType(
             static_cast<framework::proto::VarType::Type>(in_dtype),
             context.GetPlace());
diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
index d68de5eaa728d..257c73b5ff49b 100644
--- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
+++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
@@ -46,8 +46,10 @@ class ReduceSumNPUKernel : public framework::OpKernel<T> {
     framework::Tensor cast_x;
     framework::Tensor cast_out;
     // NOTE: ReduceSumD only supports fp32 and fp16
-    if (x->type() != framework::proto::VarType::FP32 &&
-        x->type() != framework::proto::VarType::FP16) {
+    if (framework::TransToProtoVarType(x->dtype()) !=
+            framework::proto::VarType::FP32 &&
+        framework::TransToProtoVarType(x->dtype()) !=
+            framework::proto::VarType::FP16) {
       cast_x.Resize(x->dims());
       cast_x.mutable_data<float>(ctx.GetPlace());
       auto dst_dtype = ConvertToNpuDtype(framework::proto::VarType::FP32);
@@ -80,9 +82,12 @@ class ReduceSumNPUKernel : public framework::OpKernel<T> {
       runner.Run(stream);
     }
 
-    if (x->type() != framework::proto::VarType::FP32 &&
-        x->type() != framework::proto::VarType::FP16) {
-      auto dst_dtype = ConvertToNpuDtype(out->type());
+    if (framework::TransToProtoVarType(x->dtype()) !=
+            framework::proto::VarType::FP32 &&
+        framework::TransToProtoVarType(x->dtype()) !=
+            framework::proto::VarType::FP16) {
+      auto dst_dtype =
+          ConvertToNpuDtype(framework::TransToProtoVarType(out->dtype()));
       const auto& runner_cast =
           NpuOpRunner("Cast", {cast_out}, {*out},
                       {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/repeat_interleave_op.cu b/paddle/fluid/operators/repeat_interleave_op.cu
index 8acb4f216ea39..88f7619259b5c 100644
--- a/paddle/fluid/operators/repeat_interleave_op.cu
+++ b/paddle/fluid/operators/repeat_interleave_op.cu
@@ -99,7 +99,8 @@ class RepeatInterleaveCUDAKernel : public framework::OpKernel<T> {
                             "But received: [%s], required: [%d].",
                             repeats_tensor->dims()[0], in->dims()[dim]));
 
-      const auto& index_type = repeats_tensor->type();
+      const auto& index_type =
+          framework::TransToProtoVarType(repeats_tensor->dtype());
       bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                               index_type == framework::proto::VarType::INT32;
       PADDLE_ENFORCE_EQ(
@@ -215,7 +216,8 @@ class RepeatInterleaveGradCUDAKernel : public framework::OpKernel<T> {
       auto repeats_tensor =
           context.Input<framework::LoDTensor>("RepeatsTensor");
 
-      const auto& index_type = repeats_tensor->type();
+      const auto& index_type =
+          framework::TransToProtoVarType(repeats_tensor->dtype());
       bool index_type_match = index_type == framework::proto::VarType::INT64 ||
                               index_type == framework::proto::VarType::INT32;
       PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/repeat_interleave_op.h b/paddle/fluid/operators/repeat_interleave_op.h
index ca861696d719e..4e47226358aa9 100644
--- a/paddle/fluid/operators/repeat_interleave_op.h
+++ b/paddle/fluid/operators/repeat_interleave_op.h
@@ -78,7 +78,8 @@ class RepeatInterleaveKernel : public framework::OpKernel<T> {
                             "But received: [%s], required: [%d].",
                             repeats_tensor->dims()[0], inputs.dims()[dim]));
 
-      const auto& index_type = repeats_tensor->type();
+      const auto& index_type =
+          framework::TransToProtoVarType(repeats_tensor->dtype());
       bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                               index_type == framework::proto::VarType::INT64;
       PADDLE_ENFORCE_EQ(
@@ -149,7 +150,8 @@ class RepeatInterleaveGradKernel : public framework::OpKernel<T> {
     if (context.HasInput("RepeatsTensor")) {
       auto repeats_tensor =
           context.Input<framework::LoDTensor>("RepeatsTensor");
-      const auto& index_type = repeats_tensor->type();
+      const auto& index_type =
+          framework::TransToProtoVarType(repeats_tensor->dtype());
 
       bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                               index_type == framework::proto::VarType::INT64;
diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc
index 5d6876465c8f6..06c3b3fe76f3a 100644
--- a/paddle/fluid/operators/rnn_memory_helper_op.cc
+++ b/paddle/fluid/operators/rnn_memory_helper_op.cc
@@ -120,7 +120,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
       auto &in_var_tensor = in_var->Get<framework::LoDTensor>();
 
       framework::AttributeMap attrs;
-      attrs["dtype"] = in_var_tensor.type();
+      attrs["dtype"] = framework::TransToProtoVarType(in_var_tensor.dtype());
       attrs["shape"] = framework::vectorize<int>(in_var_tensor.dims());
       attrs["value"] = 0.0f;
 
diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc
index 7e19287d42565..efe13251e6fa8 100644
--- a/paddle/fluid/operators/roi_align_op_npu.cc
+++ b/paddle/fluid/operators/roi_align_op_npu.cc
@@ -53,7 +53,7 @@ class ROIAlignNPUKernel : public framework::OpKernel<T> {
     int dtype =
         static_cast<int>(ConvertToNpuDtype(framework::proto::VarType::FP32));
     framework::NPUAttributeMap attr_cast = {{"dst_type", dtype}};
-    Tensor ROIsNum_fp(ROIs->type());
+    Tensor ROIsNum_fp(ROIs->dtype());
     ROIsNum_fp.Resize(framework::make_ddim({ROIs->dims()[0], 1}));
     ROIsNum_fp.mutable_data<T>(ctx.GetPlace());
 
@@ -67,7 +67,7 @@ class ROIAlignNPUKernel : public framework::OpKernel<T> {
     x_list.push_back(*ROIs);
     auto axis = 1;
     // output of concate
-    Tensor ROIs_N5(ROIs->type());
+    Tensor ROIs_N5(ROIs->dtype());
     ROIs_N5.Resize(framework::make_ddim({ROIs->dims()[0], 5}));
     ROIs_N5.mutable_data<T>(ctx.GetPlace());
 
@@ -128,7 +128,8 @@ class ROIAlignNPUGradKernel : public framework::OpKernel<T> {
         platform::errors::NotFound("Input(RoisNum) of ROIAlignGradOp "
                                    "is not found while using NPU."));
     PADDLE_ENFORCE_EQ(
-        rois->type(), framework::proto::VarType::FP32,
+        framework::TransToProtoVarType(rois->dtype()),
+        framework::proto::VarType::FP32,
         platform::errors::InvalidArgument(
             "ROIAlignGradNPU only support ROIs type equaled to FP32."));
 
diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h
index 0aa39c9af5c17..364a490536bd6 100644
--- a/paddle/fluid/operators/save_combine_op.h
+++ b/paddle/fluid/operators/save_combine_op.h
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/lod_tensor.h"
@@ -85,7 +86,7 @@ class SaveCombineOpKernel : public framework::OpKernel<T> {
                 inp_var_names[i]));
         // Serialize tensors one by one
         // Check types to see if a fp16 transformation is required
-        auto in_dtype = tensor.type();
+        auto in_dtype = framework::TransToProtoVarType(tensor.dtype());
         auto out_dtype =
             save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
 
diff --git a/paddle/fluid/operators/save_op.h b/paddle/fluid/operators/save_op.h
index 2a61d7ce0c25b..bdabd3ba7ef27 100644
--- a/paddle/fluid/operators/save_op.h
+++ b/paddle/fluid/operators/save_op.h
@@ -84,7 +84,7 @@ class SaveOpKernel : public framework::OpKernel<T> {
                           "Cannot open %s to save variables.", filename));
 
     auto save_as_fp16 = ctx.Attr<bool>("save_as_fp16");
-    auto in_dtype = tensor.type();
+    auto in_dtype = framework::TransToProtoVarType(tensor.dtype());
     auto out_dtype = save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
 
     if (in_dtype != out_dtype) {
diff --git a/paddle/fluid/operators/scale_op_mlu.cc b/paddle/fluid/operators/scale_op_mlu.cc
index d027ac0d3317f..fa222ecae1c93 100644
--- a/paddle/fluid/operators/scale_op_mlu.cc
+++ b/paddle/fluid/operators/scale_op_mlu.cc
@@ -32,12 +32,14 @@ class ScaleMLUKernel : public framework::OpKernel<T> {
     if (ctx.HasInput("ScaleTensor")) {
       framework::Tensor float_scale_tensor =
           *ctx.Input<framework::Tensor>("ScaleTensor");
-      if (float_scale_tensor.type() != in->type()) {
+      if (framework::TransToProtoVarType(float_scale_tensor.dtype()) !=
+          framework::TransToProtoVarType(in->dtype())) {
         scale_tensor = ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
         MLUCnnlTensorDesc float_scale_desc(float_scale_tensor);
         MLUCnnlTensorDesc final_scale_desc(scale_tensor);
-        cnnlCastDataType_t cast_type =
-            GetCastDataType(float_scale_tensor.type(), scale_tensor.type());
+        cnnlCastDataType_t cast_type = GetCastDataType(
+            framework::TransToProtoVarType(float_scale_tensor.dtype()),
+            framework::TransToProtoVarType(scale_tensor.dtype()));
         MLUCnnl::Cast(ctx, cast_type, float_scale_desc.get(),
                       GetBasePtr(&float_scale_tensor), final_scale_desc.get(),
                       GetBasePtr(&scale_tensor));
@@ -83,13 +85,15 @@ class ScaleMLUKernel : public framework::OpKernel<T> {
           ctx.AllocateTmpTensor<T, MLUDeviceContext>({1}, dev_ctx);
       MLUCnnlTensorDesc new_bias_desc(new_bias_tensor);
 
-      MLUCnnlOpTensorDesc mul_op_desc(CNNL_OP_TENSOR_MUL,
-                                      ToCnnlDataType(in->type()),
-                                      CNNL_NOT_PROPAGATE_NAN);
+      MLUCnnlOpTensorDesc mul_op_desc(
+          CNNL_OP_TENSOR_MUL,
+          ToCnnlDataType(framework::TransToProtoVarType(in->dtype())),
+          CNNL_NOT_PROPAGATE_NAN);
       MLUCnnl::OpTensor(
           ctx, mul_op_desc.get(), scale_desc.get(), GetBasePtr(&scale_tensor),
           bias_desc.get(), GetBasePtr(&bias_tensor), new_bias_desc.get(),
-          GetBasePtr(&new_bias_tensor), ToCnnlDataType(in->type()));
+          GetBasePtr(&new_bias_tensor),
+          ToCnnlDataType(framework::TransToProtoVarType(in->dtype())));
       MLUCnnl::Scale(ctx, axis, input_desc.get(), GetBasePtr(in),
                      scale_desc.get(), GetBasePtr(&scale_tensor),
                      new_bias_desc.get(), GetBasePtr(&new_bias_tensor),
diff --git a/paddle/fluid/operators/scale_op_npu.cc b/paddle/fluid/operators/scale_op_npu.cc
index 807ad7509e573..8ff565d4e7870 100644
--- a/paddle/fluid/operators/scale_op_npu.cc
+++ b/paddle/fluid/operators/scale_op_npu.cc
@@ -79,11 +79,13 @@ class ScaleNPUKernel : public framework::OpKernel<T> {
       adds_runner.Run(dev_ctx.stream());
     };
 
-    if (x->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+        framework::proto::VarType::INT32) {
       NpuOpRunner::TypeAdapter({*x}, {*out}, attrs, dev_ctx, op_func,
                                {framework::proto::VarType::INT32},
                                {framework::proto::VarType::INT32});
-    } else if (x->type() == framework::proto::VarType::INT64) {
+    } else if (framework::TransToProtoVarType(x->dtype()) ==
+               framework::proto::VarType::INT64) {
       NpuOpRunner::TypeAdapter({*x}, {*out}, attrs, dev_ctx, op_func,
                                {framework::proto::VarType::INT32},
                                {framework::proto::VarType::INT32});
diff --git a/paddle/fluid/operators/scatter_nd_add_op.cc b/paddle/fluid/operators/scatter_nd_add_op.cc
index 2d23e81717abb..fa9610b062a1f 100644
--- a/paddle/fluid/operators/scatter_nd_add_op.cc
+++ b/paddle/fluid/operators/scatter_nd_add_op.cc
@@ -98,8 +98,9 @@ class ScatterNdAddOp : public framework::OperatorWithKernel {
                       OperatorWithKernel::IndicateVarDataType(ctx, "Updates"),
                       platform::errors::InvalidArgument(
                           "Ref and Updates must have same type"));
-    return framework::OpKernelType(ctx.Input<Tensor>("X")->type(),
-                                   ctx.device_context());
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(ctx.Input<Tensor>("X")->type()),
+        ctx.device_context());
   }
 };
 
diff --git a/paddle/fluid/operators/scatter_nd_add_op.cu b/paddle/fluid/operators/scatter_nd_add_op.cu
index ec2a0201de63d..6448f8cc4056d 100644
--- a/paddle/fluid/operators/scatter_nd_add_op.cu
+++ b/paddle/fluid/operators/scatter_nd_add_op.cu
@@ -33,7 +33,7 @@ class ScatterNdAddOpCUDAKernel : public framework::OpKernel<T> {
     auto *Out = ctx.Output<Tensor>("Out");
 
     framework::TensorCopySync(*X, ctx.GetPlace(), Out);
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -70,7 +70,7 @@ class ScatterNdAddGradOpCUDAKernel : public framework::OpKernel<T> {
     if (dUpdates) {
       dUpdates->mutable_data<T>(ctx.GetPlace());
       // Gradient by Gather
-      const auto &index_type = Ids->type();
+      const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
       if (index_type == framework::proto::VarType::INT32) {
         GPUGatherNd<DeviceContext, T, int32_t>(ctx, *dOut, *Ids, dUpdates);
       } else {
diff --git a/paddle/fluid/operators/scatter_nd_add_op.h b/paddle/fluid/operators/scatter_nd_add_op.h
index 904b8a421d015..2bdf9ec58a850 100644
--- a/paddle/fluid/operators/scatter_nd_add_op.h
+++ b/paddle/fluid/operators/scatter_nd_add_op.h
@@ -37,7 +37,7 @@ class ScatterNdAddOpKernel : public framework::OpKernel<T> {
 
     // In place output: Out = X
     framework::TensorCopySync(*X, ctx.GetPlace(), Out);
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -76,7 +76,7 @@ class ScatterNdAddGradientOpKernel : public framework::OpKernel<T> {
     if (dUpdates) {
       dUpdates->mutable_data<T>(ctx.GetPlace());
       // Gradient by Gather: dUpdates = dO[Ids]
-      const auto &index_type = Ids->type();
+      const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
       if (index_type == framework::proto::VarType::INT32) {
         CPUGatherNd<T, int32_t>(ctx.device_context(), *dOut, *Ids, dUpdates);
       } else {
diff --git a/paddle/fluid/operators/scatter_op.cu b/paddle/fluid/operators/scatter_op.cu
index c8dae35822544..549e30803b464 100644
--- a/paddle/fluid/operators/scatter_op.cu
+++ b/paddle/fluid/operators/scatter_op.cu
@@ -35,7 +35,7 @@ class ScatterOpCUDAKernel : public framework::OpKernel<T> {
 
     framework::TensorCopy(*X, ctx.GetPlace(), Out);
     // use template class to support int32_t and int64_t
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(
@@ -68,7 +68,7 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel<T> {
     auto *Ids = ctx.Input<Tensor>("Ids");
     auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
 
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/scatter_op.h b/paddle/fluid/operators/scatter_op.h
index 185398bed10ea..69ab6c7135cd5 100644
--- a/paddle/fluid/operators/scatter_op.h
+++ b/paddle/fluid/operators/scatter_op.h
@@ -39,7 +39,7 @@ class ScatterOpKernel : public framework::OpKernel<T> {
     // In place output: Out = X, Out[Ids] = Updates
     framework::TensorCopy(*X, ctx.GetPlace(), Out);
     // Apply ScatterUpdate: Out[index] = Updates[:]
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
@@ -79,7 +79,7 @@ class ScatterGradientOpKernel : public framework::OpKernel<T> {
     auto *Ids = ctx.Input<Tensor>("Ids");
     auto *dOut = ctx.Input<Tensor>(framework::GradVarName("Out"));
 
-    const auto &index_type = Ids->type();
+    const auto &index_type = framework::TransToProtoVarType(Ids->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc
index 8d92ea4166513..3d2563c584af2 100644
--- a/paddle/fluid/operators/scatter_op_npu.cc
+++ b/paddle/fluid/operators/scatter_op_npu.cc
@@ -68,7 +68,8 @@ class ScatterNPUKernel : public framework::OpKernel<T> {
     };
 
     if (overwrite) {
-      if (x->type() == framework::proto::VarType::INT64) {
+      if (framework::TransToProtoVarType(x->dtype()) ==
+          framework::proto::VarType::INT64) {
         NpuOpRunner::TypeAdapter(
             {*x, *index, *updates}, {*out}, {}, dev_ctx, op_func_update,
             {framework::proto::VarType::INT32, framework::proto::VarType::INT32,
@@ -80,7 +81,8 @@ class ScatterNPUKernel : public framework::OpKernel<T> {
         runner_update.Run(dev_ctx.stream());
       }
     } else {
-      if (x->type() == framework::proto::VarType::INT64) {
+      if (framework::TransToProtoVarType(x->dtype()) ==
+          framework::proto::VarType::INT64) {
         NpuOpRunner::TypeAdapter(
             {*x, *index, *updates}, {*out}, {}, dev_ctx, op_func_add,
             {framework::proto::VarType::INT32, framework::proto::VarType::INT32,
diff --git a/paddle/fluid/operators/scatter_op_xpu.cc b/paddle/fluid/operators/scatter_op_xpu.cc
index fadf063bc5bd6..9eeebf8420b83 100644
--- a/paddle/fluid/operators/scatter_op_xpu.cc
+++ b/paddle/fluid/operators/scatter_op_xpu.cc
@@ -36,7 +36,7 @@ class ScatterOpXPUKernel : public framework::OpKernel<T> {
     // In place output: Out = X, Out[ids] = Updates
     framework::TensorCopy(*x, ctx.GetPlace(), out);
     // Apply ScatterUpdate: Out[index] = Updates[:]
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                             index_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/searchsorted_op.h b/paddle/fluid/operators/searchsorted_op.h
index 9eec755293199..f421bdd3cb712 100644
--- a/paddle/fluid/operators/searchsorted_op.h
+++ b/paddle/fluid/operators/searchsorted_op.h
@@ -180,12 +180,12 @@ class SearchSortedKernel : public framework::OpKernel<T> {
       int* out_data = out->mutable_data<int>(context.GetPlace());
       SearchSortedFunctor<DeviceContext, T, int> functor(
           context, sorted_sequence, value, right, out_data);
-      VisitDataType(value->type(), functor);
+      VisitDataType(framework::TransToProtoVarType(value->dtype()), functor);
     } else {
       int64_t* out_data = out->mutable_data<int64_t>(context.GetPlace());
       SearchSortedFunctor<DeviceContext, T, int64_t> functor(
           context, sorted_sequence, value, right, out_data);
-      VisitDataType(value->type(), functor);
+      VisitDataType(framework::TransToProtoVarType(value->dtype()), functor);
     }
   }
 };
diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h
index 47b18e04e4dcc..fe89ef7212f0c 100644
--- a/paddle/fluid/operators/segment_pool_op.h
+++ b/paddle/fluid/operators/segment_pool_op.h
@@ -122,7 +122,7 @@ class SegmentPoolKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* segment = context.Input<Tensor>("SegmentIds");
-    auto index_type = segment->type();
+    auto index_type = framework::TransToProtoVarType(segment->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       SegmentKernelLaunchHelper<DeviceContext, T, int>(context);
     } else if (index_type == framework::proto::VarType::INT64) {
@@ -156,7 +156,7 @@ class SegmentPoolGradKernel : public framework::OpKernel<T> {
     auto& dev_ctx = context.template device_context<DeviceContext>();
     set_zero(dev_ctx, in_g, static_cast<T>(0));
 
-    auto index_type = segment->type();
+    auto index_type = framework::TransToProtoVarType(segment->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       SegmentPoolGradFunctor<DeviceContext, T, int> pool;
       pool(context.template device_context<DeviceContext>(), *input, *output,
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
index 675ea175a16a9..ee108eae53e08 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
@@ -59,9 +59,10 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
 
     Tensor cast_x;
     cast_x.mutable_data<int32_t>(x->dims(), ctx.GetPlace());
-    const auto& cast1_runner =
-        NpuOpRunner("Cast", {*x}, {cast_x},
-                    {{"dst_type", ConvertToNpuDtype(cast_x.type())}});
+    const auto& cast1_runner = NpuOpRunner(
+        "Cast", {*x}, {cast_x},
+        {{"dst_type",
+          ConvertToNpuDtype(framework::TransToProtoVarType(cast_x.dtype()))}});
     cast1_runner.Run(dev_ctx.stream());
 
     Tensor tmp;
diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h
index 633bc468dc44e..d9dc4ad0a3aba 100644
--- a/paddle/fluid/operators/set_value_op.h
+++ b/paddle/fluid/operators/set_value_op.h
@@ -18,6 +18,7 @@
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/tensor_util.h"
@@ -176,7 +177,6 @@ class SetValueKernel : public framework::OpKernel<T> {
     auto decrease_axes = ctx.Attr<std::vector<int64_t>>("decrease_axes");
     auto none_axes = ctx.Attr<std::vector<int64_t>>("none_axes");
 
-    auto dtype = in->type();
     if (!starts_tensor_list.empty()) {
       starts = GetDataFromTensorList<int64_t>(starts_tensor_list);
     }
@@ -235,7 +235,7 @@ class SetValueKernel : public framework::OpKernel<T> {
     // set_value is what we want.
     paddle::framework::TensorCopy(*in, place, out);
 
-    Tensor slice_tensor(dtype), pad_tensor(dtype);
+    Tensor slice_tensor(in->dtype()), pad_tensor(in->dtype());
     slice_tensor.mutable_data<T>(slice_dims, place);
     pad_tensor.mutable_data<T>(in_dims, place);
 
@@ -292,12 +292,13 @@ class SetValueKernel : public framework::OpKernel<T> {
       ElementwiseComputeEx<SubFunctor<T>, DeviceContext, T>(
           ctx, &slice_tensor, value_tensor, -1, SubFunctor<T>(), &slice_tensor);
     } else {
-      Tensor value_t(dtype);
+      Tensor value_t(in->dtype());
       auto value_dims = framework::make_ddim(shape);
       CheckIsDimsMatch(slice_dims_for_assign, value_dims);
 
       value_t.mutable_data<T>(value_dims, place);
-      auto value_name = GetValueName(dtype);
+      auto value_name =
+          GetValueName(framework::TransToProtoVarType(in->dtype()));
       CopyVecotorToTensor<T>(value_name.c_str(), &value_t, ctx);
       value_t.Resize(value_dims);
       ElementwiseComputeEx<SubFunctor<T>, DeviceContext, T>(
@@ -447,7 +448,7 @@ class SetValueGradKernel : public framework::OpKernel<T> {
           framework::EigenTensor<T, D, Eigen::RowMajor,
                                  Eigen::DenseIndex>::From(*grad_input);
 
-      framework::Tensor tmp(grad_input->type());
+      framework::Tensor tmp(grad_input->dtype());
       tmp.mutable_data<T>(out_dims, context.GetPlace());
       set_zero(dev_ctx, &tmp, static_cast<T>(0));
       auto tmp_t = framework::EigenTensor<T, D, Eigen::RowMajor,
@@ -468,7 +469,7 @@ class SetValueGradKernel : public framework::OpKernel<T> {
             framework::EigenTensor<T, D, Eigen::RowMajor,
                                    Eigen::DenseIndex>::From(*grad_value);
         if (need_reverse) {
-          framework::Tensor tmp(grad_value->type());
+          framework::Tensor tmp(grad_value->dtype());
           tmp.mutable_data<T>(out_dims, context.GetPlace());
           set_zero(dev_ctx, &tmp, static_cast<T>(0));
           auto tmp_t = framework::EigenTensor<T, D, Eigen::RowMajor,
@@ -534,7 +535,7 @@ class SetValueGradKernel : public framework::OpKernel<T> {
             framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::
                 From(*grad_value, fake_grad_value_dims);
 
-        framework::Tensor tmp(grad_value->type());
+        framework::Tensor tmp(grad_value->dtype());
         tmp.mutable_data<T>(out_dims, context.GetPlace());
         set_zero(dev_ctx, &tmp, static_cast<T>(0));
         auto tmp_t = framework::EigenTensor<T, D, Eigen::RowMajor,
@@ -550,7 +551,7 @@ class SetValueGradKernel : public framework::OpKernel<T> {
               tmp_t.slice(framework::EigenDim<D>::From(offset), extent);
         }
         if (need_reverse) {
-          framework::Tensor tmp_value(grad_value->type());
+          framework::Tensor tmp_value(grad_value->dtype());
           tmp_value.mutable_data<T>(fake_grad_value_dims, context.GetPlace());
           auto tmp_value_t =
               framework::EigenTensor<T, D, Eigen::RowMajor,
diff --git a/paddle/fluid/operators/set_value_op_npu.cc b/paddle/fluid/operators/set_value_op_npu.cc
index 2b4ad94058c27..4a6a8286e830d 100644
--- a/paddle/fluid/operators/set_value_op_npu.cc
+++ b/paddle/fluid/operators/set_value_op_npu.cc
@@ -135,7 +135,8 @@ class SetValueNPUKernel : public framework::OpKernel<T> {
       CheckIsDimsMatch(slice_dims_for_assign, value_dims);
 
       value_t.mutable_data<T>(value_dims, ctx.GetPlace());
-      auto value_name = GetValueName(in->type());
+      auto value_name =
+          GetValueName(framework::TransToProtoVarType(in->dtype()));
       CopyVecotorToTensor<T>(value_name.c_str(), &value_t, ctx);
       value_t.Resize(value_dims);
     }
diff --git a/paddle/fluid/operators/shard_index_op_npu.cc b/paddle/fluid/operators/shard_index_op_npu.cc
index a19deb160dbd8..dc2e8ad58f31c 100644
--- a/paddle/fluid/operators/shard_index_op_npu.cc
+++ b/paddle/fluid/operators/shard_index_op_npu.cc
@@ -67,7 +67,7 @@ class ShardIndexNPUKernel : public framework::OpKernel<T> {
     tmp.mutable_data<T>(framework::DDim({1}), place);
     FillNpuTensorWithConstant(&tmp, shard_size);
 
-    Tensor condition(framework::proto::VarType::BOOL);
+    Tensor condition(experimental::DataType::BOOL);
     condition.mutable_data<bool>(in->dims(), place);
 
     Tensor tmp2(in->type());
diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc
index 38721e5e3e5bd..a15c2f3d96c16 100644
--- a/paddle/fluid/operators/shrink_rnn_memory_op.cc
+++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc
@@ -79,7 +79,7 @@ class ShrinkRNNMemoryOp : public ArrayOp {
     }
 
     if (dst_num_rows != 0) {
-      out_tensor.mutable_data(place, x_tensor.type());
+      out_tensor.mutable_data(place, x_tensor.dtype());
       auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
       framework::TensorCopy(x_tensor.Slice(0, height), place, *dev_ctx,
                             &out_tensor);
@@ -149,7 +149,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
     auto &x_tensor = x_var->Get<framework::LoDTensor>();
     auto &dx_tensor = *dx_var->GetMutable<framework::LoDTensor>();
     dx_tensor.Resize(x_tensor.dims());
-    dx_tensor.mutable_data(x_tensor.place(), x_tensor.type());
+    dx_tensor.mutable_data(x_tensor.place(), x_tensor.dtype());
 
     // get device context from pool
     platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc
index 37c79b9898e9d..a3d064a8e6dbf 100644
--- a/paddle/fluid/operators/slice_op.cc
+++ b/paddle/fluid/operators/slice_op.cc
@@ -130,7 +130,9 @@ class SliceOp : public framework::OperatorWithKernel {
               "The tensor Input (Input) of Slice op is not initialized."));
       // NOTE: cuda pinned tensor need to copy its data to target place
       if (platform::is_cuda_pinned_place(in_tensor.place())) {
-        return framework::OpKernelType(in_tensor.type(), ctx.device_context());
+        return framework::OpKernelType(
+            framework::TransToProtoVarType(in_tensor.dtype()),
+            ctx.device_context());
       }
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -152,7 +154,8 @@ class SliceOp : public framework::OperatorWithKernel {
       }
 #endif
 
-      return framework::OpKernelType(in_tensor.type(), in_tensor.place());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(in_tensor.dtype()), in_tensor.place());
     }
     return framework::OpKernelType(
         OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace());
diff --git a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc
index a05ab9056b5cd..73da0e4756b1c 100644
--- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc
+++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc
@@ -42,12 +42,12 @@ class SmoothL1LossNPUKernel : public framework::OpKernel<T> {
     const auto& runner1 = NpuOpRunner("Sub", {*in_x, *in_y}, {*out_diff}, {});
     runner1.Run(stream);
 
-    Tensor no_reduce_loss(in_x->type());
+    Tensor no_reduce_loss(in_x->dtype());
     no_reduce_loss.Resize(in_x->dims());
     no_reduce_loss.mutable_data<T>(context.GetPlace());
     // multiply inside weight before get the loss
     if (has_weight) {
-      Tensor tmp_diff(out_diff->type());
+      Tensor tmp_diff(out_diff->dtype());
       tmp_diff.Resize(out_diff->dims());
       tmp_diff.mutable_data<T>(context.GetPlace());
       const auto& runner2 =
@@ -58,11 +58,11 @@ class SmoothL1LossNPUKernel : public framework::OpKernel<T> {
           context.template device_context<paddle::platform::NPUDeviceContext>(),
           out_diff);
 
-      Tensor tmp_x(in_x->type());
+      Tensor tmp_x(in_x->dtype());
       tmp_x.Resize(in_x->dims());
       tmp_x.mutable_data<T>(context.GetPlace());
 
-      Tensor tmp_y(in_y->type());
+      Tensor tmp_y(in_y->dtype());
       tmp_y.Resize(in_y->dims());
       tmp_y.mutable_data<T>(context.GetPlace());
 
@@ -85,7 +85,7 @@ class SmoothL1LossNPUKernel : public framework::OpKernel<T> {
     // multiply outside weight and loss
     // reduceSum because the output'shape must be [B,1]
     if (has_weight) {
-      Tensor tmp_loss(no_reduce_loss.type());
+      Tensor tmp_loss(no_reduce_loss.dtype());
       tmp_loss.Resize(no_reduce_loss.dims());
       tmp_loss.mutable_data<T>(context.GetPlace());
       const auto& runner4 =
@@ -123,13 +123,13 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel<T> {
             .stream();
 
     // diff == in_x - in_y == diff - 0
-    Tensor tmp_zero(diff->type());
+    Tensor tmp_zero(diff->dtype());
     tmp_zero.Resize(diff->dims());
     tmp_zero.mutable_data<T>(context.GetPlace());
     const auto& runner_zero = NpuOpRunner("ZerosLike", {*diff}, {tmp_zero}, {});
     runner_zero.Run(stream);
 
-    Tensor grad(diff->type());
+    Tensor grad(diff->dtype());
     grad.Resize(diff->dims());
     grad.mutable_data<T>(context.GetPlace());
     // broadcast og(output_grad) to adapt to the npu interface
@@ -138,7 +138,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel<T> {
                     {{"shape", framework::vectorize(diff->dims())}});
     runner_broad.Run(stream);
 
-    Tensor gradient(diff->type());
+    Tensor gradient(diff->dtype());
     gradient.Resize(diff->dims());
     gradient.mutable_data<T>(context.GetPlace());
     // diff == diff - 0 == in_x - in_y
@@ -149,14 +149,14 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel<T> {
 
     // mul weight and gradient
     if (has_weight) {
-      Tensor weight(inside_weight->type());
+      Tensor weight(inside_weight->dtype());
       weight.Resize(inside_weight->dims());
       weight.mutable_data<T>(context.GetPlace());
       const auto& runner_weight =
           NpuOpRunner("Mul", {*inside_weight, *outside_weight}, {weight}, {});
       runner_weight.Run(stream);
 
-      Tensor tmp_grad(gradient.type());
+      Tensor tmp_grad(gradient.dtype());
       tmp_grad.Resize(gradient.dims());
       tmp_grad.mutable_data<T>(context.GetPlace());
       const auto& runner_weight_grad =
@@ -180,7 +180,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel<T> {
     // outy_grad = - gradient
     if (outy_grad) {
       outy_grad->mutable_data<T>(context.GetPlace());
-      Tensor coeff(framework::proto::VarType::FP32);
+      Tensor coeff(experimental::DataType::FLOAT32);
       coeff.mutable_data<float>({1}, context.GetPlace());
       FillNpuTensorWithConstant<float>(&coeff, -1);
       const auto& runner_y_grad =
diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.h b/paddle/fluid/operators/softmax_with_cross_entropy_op.h
index ef8b0bbdf9d32..a7f88dd0ec38e 100644
--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.h
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/cross_entropy.h"
@@ -54,7 +55,7 @@ static void RunSoftmaxWithCrossEntropyFunctor(
   const bool soft_label = context.Attr<bool>("soft_label");
   SoftmaxWithCrossEntropyFunctor<T, Visitor> functor(context, *labels,
                                                      soft_label, visitor);
-  auto dtype = labels->type();
+  auto dtype = framework::TransToProtoVarType(labels->dtype());
   if (soft_label) {
     PADDLE_ENFORCE_EQ(
         dtype, framework::DataTypeTrait<T>::DataType(),
diff --git a/paddle/fluid/operators/solve_op.h b/paddle/fluid/operators/solve_op.h
index c023d33a444cf..f75dd6243a2ee 100644
--- a/paddle/fluid/operators/solve_op.h
+++ b/paddle/fluid/operators/solve_op.h
@@ -327,11 +327,11 @@ static void linalg_solve(const framework::ExecutionContext& context,
 
   Tensor tmp_y;
   if (is_vector) {
-    tmp_y.mutable_data(context.GetPlace(), y->type());
+    tmp_y.mutable_data(context.GetPlace(), y->dtype());
     to_unsqueeze(context, *y, &tmp_y);
   } else {
     tmp_y.Resize(y->dims());
-    tmp_y.mutable_data(context.GetPlace(), y->type());
+    tmp_y.mutable_data(context.GetPlace(), y->dtype());
     framework::TensorCopy(
         *y, context.GetPlace(),
         context.template device_context<platform::DeviceContext>(), &tmp_y);
@@ -339,7 +339,7 @@ static void linalg_solve(const framework::ExecutionContext& context,
 
   Tensor tmp_x;
   tmp_x.Resize(x->dims());
-  tmp_x.mutable_data(context.GetPlace(), x->type());
+  tmp_x.mutable_data(context.GetPlace(), x->dtype());
   framework::TensorCopy(
       *x, context.GetPlace(),
       context.template device_context<platform::DeviceContext>(), &tmp_x);
@@ -473,11 +473,11 @@ class SolveGradKernel : public framework::OpKernel<T> {
 
     Tensor tmp_y;
     if (is_vector) {
-      tmp_y.mutable_data(ctx.GetPlace(), y->type());
+      tmp_y.mutable_data(ctx.GetPlace(), y->dtype());
       to_unsqueeze(ctx, *y, &tmp_y);
     } else {
       tmp_y.Resize(y->dims());
-      tmp_y.mutable_data(ctx.GetPlace(), y->type());
+      tmp_y.mutable_data(ctx.GetPlace(), y->dtype());
       framework::TensorCopy(
           *y, ctx.GetPlace(),
           ctx.template device_context<platform::DeviceContext>(), &tmp_y);
@@ -485,7 +485,7 @@ class SolveGradKernel : public framework::OpKernel<T> {
 
     Tensor tmp_x;
     tmp_x.Resize(input->dims());
-    tmp_x.mutable_data(ctx.GetPlace(), input->type());
+    tmp_x.mutable_data(ctx.GetPlace(), input->dtype());
     framework::TensorCopy(
         *input, ctx.GetPlace(),
         ctx.template device_context<platform::DeviceContext>(), &tmp_x);
@@ -505,7 +505,7 @@ class SolveGradKernel : public framework::OpKernel<T> {
     tmp_dy.Resize(framework::make_ddim(y_broadcast_dims));
     tmp_dy.mutable_data<T>(ctx.GetPlace());
 
-    Tensor tmp_input(input->type());
+    Tensor tmp_input(input->dtype());
     const auto& new_dims_vec = getNewDimsVec(input->dims());
     tmp_input.Resize(framework::make_ddim(new_dims_vec));
     tmp_input.mutable_data<T>(ctx.GetPlace());
@@ -530,11 +530,11 @@ class SolveGradKernel : public framework::OpKernel<T> {
         blas.MatMul(tmp_dy, mat_dim_a1, *out, mat_dim_b1, T(-1), &tmp_dx, T(0));
       } else if (is_vector_rhs(*input, *y)) {
         Tensor tmp_dy_;
-        tmp_dy_.mutable_data(ctx.GetPlace(), y->type());
+        tmp_dy_.mutable_data(ctx.GetPlace(), y->dtype());
         to_unsqueeze(ctx, tmp_dy, &tmp_dy_);
 
         Tensor tmp_out_;
-        tmp_out_.mutable_data(ctx.GetPlace(), out->type());
+        tmp_out_.mutable_data(ctx.GetPlace(), out->dtype());
         to_unsqueeze(ctx, *out, &tmp_out_);
 
         auto mat_dim_a1 =
@@ -553,7 +553,7 @@ class SolveGradKernel : public framework::OpKernel<T> {
     if (y->dims() != tmp_dy.dims()) {
       Tensor dy_help;
       dy_help.Resize(tmp_dy.dims());
-      dy_help.mutable_data(ctx.GetPlace(), tmp_dy.type());
+      dy_help.mutable_data(ctx.GetPlace(), tmp_dy.dtype());
       framework::TensorCopy(
           tmp_dy, ctx.GetPlace(),
           ctx.template device_context<platform::DeviceContext>(), &dy_help);
@@ -607,7 +607,7 @@ class SolveGradKernel : public framework::OpKernel<T> {
     if (input->dims() != tmp_dx.dims()) {
       Tensor dx_help;
       dx_help.Resize(tmp_dx.dims());
-      dx_help.mutable_data(ctx.GetPlace(), tmp_dx.type());
+      dx_help.mutable_data(ctx.GetPlace(), tmp_dx.dtype());
       framework::TensorCopy(
           tmp_dx, ctx.GetPlace(),
           ctx.template device_context<platform::DeviceContext>(), &dx_help);
diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu
index b937de1bc8684..5cc7502bb8c44 100644
--- a/paddle/fluid/operators/sparse_attention_op.cu
+++ b/paddle/fluid/operators/sparse_attention_op.cu
@@ -362,7 +362,8 @@ void DotSdd(const platform::CUDADeviceContext& ctx, const Tensor* a,
   const int* c_columns_data = c_columns->data<int>();
   T* c_value_data = c_value->data<T>();
 
-  cudaDataType_t gpu_type = GetGpuType(c_value->type());
+  cudaDataType_t gpu_type =
+      GetGpuType(framework::TransToProtoVarType(c_value->dtype()));
   cusparseHandle_t handle = nullptr;
   cusparseDnMatDescr_t mat_a, mat_b;
   cusparseSpMatDescr_t mat_c;
@@ -418,7 +419,8 @@ void DotDsd(const platform::CUDADeviceContext& ctx, const Tensor* a_offset,
   const T* b_data = b->data<T>();
   T* c_data = c->data<T>();
 
-  cudaDataType_t gpu_type = GetGpuType(c->type());
+  cudaDataType_t gpu_type =
+      GetGpuType(framework::TransToProtoVarType(c->dtype()));
   cusparseHandle_t handle = nullptr;
   cusparseSpMatDescr_t mat_a;
   cusparseDnMatDescr_t mat_b, mat_c;
diff --git a/paddle/fluid/operators/spectral_op.cc b/paddle/fluid/operators/spectral_op.cc
index 64751a21c837d..325bbd99d796b 100644
--- a/paddle/fluid/operators/spectral_op.cc
+++ b/paddle/fluid/operators/spectral_op.cc
@@ -572,12 +572,14 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out,
 
   // make a DFTI_DESCRIPTOR
   DftiDescriptor desc =
-      _plan_mkl_fft(x->type(), out->type(), input_stride, output_stride,
-                    signal_sizes, normalization, forward);
+      _plan_mkl_fft(framework::TransToProtoVarType(x->dtype()),
+                    framework::TransToProtoVarType(out->dtype()), input_stride,
+                    output_stride, signal_sizes, normalization, forward);
 
   const FFTTransformType fft_type = GetFFTTransformType(x->type(), out->type());
   if (fft_type == FFTTransformType::C2R && forward) {
-    framework::Tensor collapsed_input_conj(collapsed_input.type());
+    framework::Tensor collapsed_input_conj(
+        framework::TransToProtoVarType(collapsed_input.dtype()));
     collapsed_input_conj.mutable_data<Ti>(collapsed_input.dims(),
                                           ctx.GetPlace());
     // conjugate the input
@@ -589,7 +591,8 @@ void exec_fft(const DeviceContext& ctx, const Tensor* x, Tensor* out,
     MKL_DFTI_CHECK(platform::dynload::DftiComputeBackward(
         desc.get(), collapsed_input_conj.data(), collapsed_output.data()));
   } else if (fft_type == FFTTransformType::R2C && !forward) {
-    framework::Tensor collapsed_output_conj(collapsed_output.type());
+    framework::Tensor collapsed_output_conj(
+        framework::TransToProtoVarType(collapsed_output.dtype()));
     collapsed_output_conj.mutable_data<To>(collapsed_output.dims(),
                                            ctx.GetPlace());
     MKL_DFTI_CHECK(platform::dynload::DftiComputeForward(
diff --git a/paddle/fluid/operators/spectral_op.cu b/paddle/fluid/operators/spectral_op.cu
index d6a775dd55de8..c932834db39b3 100644
--- a/paddle/fluid/operators/spectral_op.cu
+++ b/paddle/fluid/operators/spectral_op.cu
@@ -73,10 +73,13 @@ FFTConfigKey create_fft_configkey(const framework::Tensor& input,
                                   const framework::Tensor& output,
                                   int signal_ndim) {
   // Create the transform plan (either from cache or locally)
-  const auto value_type = framework::IsComplexType(input.type())
-                              ? framework::ToRealType(input.type())
-                              : input.type();
-  auto fft_type = GetFFTTransformType(input.type(), output.type());
+  const auto value_type =
+      framework::IsComplexType(framework::TransToProtoVarType(input.dtype()))
+          ? framework::ToRealType(framework::TransToProtoVarType(input.dtype()))
+          : framework::TransToProtoVarType(input.dtype());
+  auto fft_type =
+      GetFFTTransformType(framework::TransToProtoVarType(input.dtype()),
+                          framework::TransToProtoVarType(output.dtype()));
   // signal sizes
   std::vector<int64_t> signal_size(signal_ndim + 1);
 
@@ -137,10 +140,13 @@ FFTConfigKey create_fft_configkey(const framework::Tensor& input,
                                   const framework::Tensor& output,
                                   int signal_ndim) {
   // Create the transform plan (either from cache or locally)
-  const auto value_type = framework::IsComplexType(input.type())
-                              ? framework::ToRealType(input.type())
-                              : input.type();
-  auto fft_type = GetFFTTransformType(input.type(), output.type());
+  const auto value_type =
+      framework::IsComplexType(framework::TransToProtoVarType(input.dtype()))
+          ? framework::ToRealType(framework::TransToProtoVarType(input.dtype()))
+          : framework::TransToProtoVarType(input.dtype());
+  auto fft_type =
+      GetFFTTransformType(framework::TransToProtoVarType(input.dtype()),
+                          framework::TransToProtoVarType(output.type()));
   // signal sizes
   std::vector<int64_t> signal_size(signal_ndim + 1);
 
diff --git a/paddle/fluid/operators/spectral_op.h b/paddle/fluid/operators/spectral_op.h
index e549c4a454b19..bff8bb5707f1c 100644
--- a/paddle/fluid/operators/spectral_op.h
+++ b/paddle/fluid/operators/spectral_op.h
@@ -15,6 +15,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/data_type_transform.h"
 #include "paddle/fluid/framework/eigen.h"
@@ -393,8 +394,9 @@ class FFTR2CGradKernel : public framework::OpKernel<T> {
       fft_c2c_func(dev_ctx, &full_dy, &complex_dx, axes, normalization,
                    !forward);
     }
-    framework::TransComplexToReal(dx->type(), complex_dx.type(), complex_dx,
-                                  dx);
+    framework::TransComplexToReal(
+        framework::TransToProtoVarType(dx->dtype()),
+        framework::TransToProtoVarType(complex_dx.dtype()), complex_dx, dx);
   }
 };
 
diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc
index d6d3f4c99aa05..cf913ffe94dd8 100644
--- a/paddle/fluid/operators/split_op_mlu.cc
+++ b/paddle/fluid/operators/split_op_mlu.cc
@@ -61,13 +61,15 @@ class SplitMLUKernel : public framework::OpKernel<T> {
     for (size_t i = 0; i < outs.size(); i++) {
       outs[i]->mutable_data<T>(ctx.GetPlace());
       output_descs.emplace_back(MLUCnnlTensorDesc(
-          *outs[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(outs[i]->type())));
+          *outs[i], CNNL_LAYOUT_ARRAY,
+          ToCnnlDataType(framework::TransToProtoVarType(outs[i]->dtype()))));
       desc_vector.push_back(output_descs.back().get());
       vct_tensor.push_back(GetBasePtr(outs[i]));
     }
     // init in tensors
-    MLUCnnlTensorDesc input_desc(*in, CNNL_LAYOUT_ARRAY,
-                                 ToCnnlDataType(in->type()));
+    MLUCnnlTensorDesc input_desc(
+        *in, CNNL_LAYOUT_ARRAY,
+        ToCnnlDataType(framework::TransToProtoVarType(in->dtype())));
 
     // MLU should do sth
     MLUCnnl::Split(ctx, num_tensor, axis, input_desc.get(), GetBasePtr(in),
diff --git a/paddle/fluid/operators/strided_slice_op.cc b/paddle/fluid/operators/strided_slice_op.cc
index a1b5ca0f6a6eb..9ed102514afcc 100644
--- a/paddle/fluid/operators/strided_slice_op.cc
+++ b/paddle/fluid/operators/strided_slice_op.cc
@@ -198,7 +198,9 @@ class StridedSliceOp : public framework::OperatorWithKernel {
     // NOTE: cuda pinned tensor need to copy its data to target place
     auto in_tensor = ctx.Input<Tensor>("Input");
     if (platform::is_cuda_pinned_place(in_tensor->place())) {
-      return framework::OpKernelType(in_tensor->type(), ctx.device_context());
+      return framework::OpKernelType(
+          framework::TransToProtoVarType(in_tensor->dtype()),
+          ctx.device_context());
     }
     return framework::OpKernelType(
         OperatorWithKernel::IndicateVarDataType(ctx, "Input"),
diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc
index 00aab6b75006a..f79563a447142 100644
--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -134,9 +135,10 @@ class SumOp : public framework::OperatorWithKernel {
           continue;
         }
         if (dtype == -1) {
-          dtype = tensor->type();
+          dtype = framework::TransToProtoVarType(tensor->dtype());
         } else {
-          PADDLE_ENFORCE_EQ(dtype, tensor->type(),
+          PADDLE_ENFORCE_EQ(dtype,
+                            framework::TransToProtoVarType(tensor->dtype()),
                             platform::errors::InvalidArgument(
                                 "The inputs type of sum op must be same"));
         }
@@ -169,8 +171,9 @@ class SumOp : public framework::OperatorWithKernel {
       for (auto& var : x_vars) {
         auto& value = var->Get<pten::SelectedRows>().value();
         if (value.IsInitialized()) {
-          return framework::OpKernelType(value.type(), ctx.device_context(),
-                                         layout, library);
+          return framework::OpKernelType(
+              framework::TransToProtoVarType(value.dtype()),
+              ctx.device_context(), layout, library);
         }
       }
       // if input sparse vars are not initialized, use an default kernel type.
@@ -181,8 +184,9 @@ class SumOp : public framework::OperatorWithKernel {
         auto& array = x_var->Get<framework::LoDTensorArray>();
         for (auto& each : array) {
           if (each.numel() != 0 && each.IsInitialized()) {
-            return framework::OpKernelType(each.type(), ctx.device_context(),
-                                           layout, library);
+            return framework::OpKernelType(
+                framework::TransToProtoVarType(each.dtype()),
+                ctx.device_context(), layout, library);
           }
         }
       }
diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc
index 59f3a424479bb..b84ac3c79876a 100644
--- a/paddle/fluid/operators/sum_op_mlu.cc
+++ b/paddle/fluid/operators/sum_op_mlu.cc
@@ -43,13 +43,15 @@ class SumMLUKernel : public framework::OpKernel<T> {
       std::vector<cnnlTensorDescriptor_t> desc_vector;
       for (int i = 0; i < ins_size; i++) {
         input_descs.emplace_back(MLUCnnlTensorDesc(
-            *ins[i], CNNL_LAYOUT_ARRAY, ToCnnlDataType(ins[i]->type())));
+            *ins[i], CNNL_LAYOUT_ARRAY,
+            ToCnnlDataType(framework::TransToProtoVarType(ins[i]->dtype()))));
         desc_vector.push_back(input_descs.back().get());
         inputs.push_back(GetBasePtr(ins[i]));
       }
       // init out tensors
-      MLUCnnlTensorDesc output_desc(*out, CNNL_LAYOUT_ARRAY,
-                                    ToCnnlDataType(out->type()));
+      MLUCnnlTensorDesc output_desc(
+          *out, CNNL_LAYOUT_ARRAY,
+          ToCnnlDataType(framework::TransToProtoVarType(out->dtype())));
       uint32_t ins_size_t = static_cast<uint32_t>(ins_size);
       MLUCnnl::AddN(ctx, ins_size_t, desc_vector.data(), inputs.data(),
                     output_desc.get(), GetBasePtr(out));
diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu.h b/paddle/fluid/operators/sync_batch_norm_op.cu.h
index 201de5ac1a428..17c96544988b6 100644
--- a/paddle/fluid/operators/sync_batch_norm_op.cu.h
+++ b/paddle/fluid/operators/sync_batch_norm_op.cu.h
@@ -26,6 +26,7 @@ limitations under the License. */
 #include <hipcub/hipcub.hpp>
 namespace cub = hipcub;
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/operators/batch_norm_op.h"
@@ -189,7 +190,8 @@ void SyncBatchNormFunctor(const framework::ExecutionContext &ctx,
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
     auto *comm = dev_ctx.nccl_comm();
     if (comm) {
-      int dtype = platform::ToNCCLDataType(mean_out->type());
+      int dtype = platform::ToNCCLDataType(
+          framework::TransToProtoVarType(mean_out->dtype()));
       // In-place operation
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
           stats, stats, 2 * C + 1, static_cast<ncclDataType_t>(dtype), ncclSum,
@@ -463,7 +465,8 @@ void SyncBatchNormGradFunctor(
 #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
   auto *comm = dev_ctx.nccl_comm();
   if (comm) {
-    int dtype = platform::ToNCCLDataType(scale->type());
+    int dtype = platform::ToNCCLDataType(
+        framework::TransToProtoVarType(scale->dtype()));
     // In-place operation
     PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclAllReduce(
         stats, stats, 2 * C + 1, static_cast<ncclDataType_t>(dtype), ncclSum,
diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
index c666d5a11fa31..b5632f4fe4a84 100644
--- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc
+++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc
@@ -398,7 +398,8 @@ class SyncBatchNormNPUKernel : public framework::OpKernel<T> {
 
         float device_counts = 0.0;
         if (comm) {
-          HcclDataType dtype = platform::ToHCCLDataType(mean_out->type());
+          HcclDataType dtype = platform::ToHCCLDataType(
+              framework::TransToProtoVarType(mean_out->dtype()));
 
           Tensor device_count_tensor;
           {
@@ -539,7 +540,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
       multiples = {N, H, W, 1};
 
     auto comm = paddle::platform::HCCLCommContext::Instance().Get(0, place);
-    HcclDataType dtype = platform::ToHCCLDataType(scale->type());
+    HcclDataType dtype = platform::ToHCCLDataType(
+        framework::TransToProtoVarType(scale->dtype()));
 
     float device_counts = 0.0;
     if (comm) {
@@ -695,7 +697,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
 
     Tensor dy_mul_x_sub_mean_for_scale;
     {
-      if (d_y->type() == framework::proto::VarType::FP16) {
+      if (framework::TransToProtoVarType(d_y->dtype()) ==
+          framework::proto::VarType::FP16) {
         dy_mul_x_sub_mean_for_scale.Resize(x->dims());
         dy_mul_x_sub_mean_for_scale.mutable_data<float>(place);
         const auto &runner = NpuOpRunner("Mul", {*d_y, x_sub_saved_mean},
@@ -712,7 +715,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
 
     Tensor dy_mul_x_sub_mean;
     {
-      if (d_y->type() == framework::proto::VarType::FP16) {
+      if (framework::TransToProtoVarType(d_y->dtype()) ==
+          framework::proto::VarType::FP16) {
         dy_mul_x_sub_mean.Resize(x->dims());
         dy_mul_x_sub_mean.mutable_data<float>(place);
         const auto &runner = NpuOpRunner("Mul", {*d_y, x_sub_saved_mean},
@@ -751,7 +755,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
     if (d_x) {
       Tensor dy_mean;
       {
-        if (d_y->type() == framework::proto::VarType::FP16) {
+        if (framework::TransToProtoVarType(d_y->dtype()) ==
+            framework::proto::VarType::FP16) {
           framework::NPUAttributeMap attr_input = {{"keep_dims", false},
                                                    {"axes", axes}};
           dy_mean.Resize({C});
@@ -813,7 +818,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
 
       Tensor dy_sub_dy_mean;
       {
-        if (d_y->type() == framework::proto::VarType::FP16) {
+        if (framework::TransToProtoVarType(d_y->dtype()) ==
+            framework::proto::VarType::FP16) {
           dy_sub_dy_mean.Resize(x->dims());
           dy_sub_dy_mean.mutable_data<float>(place);
           const auto &runner =
@@ -964,7 +970,8 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel<T> {
 
     // cacl d_bias
     if (d_bias) {
-      if (d_y->type() == framework::proto::VarType::FP16) {
+      if (framework::TransToProtoVarType(d_y->dtype()) ==
+          framework::proto::VarType::FP16) {
         framework::NPUAttributeMap attr_input = {{"keep_dims", false},
                                                  {"axes", axes}};
         d_bias->mutable_data<float>(place);
diff --git a/paddle/fluid/operators/take_along_axis_op.cu b/paddle/fluid/operators/take_along_axis_op.cu
index 2d0ebbc20f215..d524d31c461cf 100644
--- a/paddle/fluid/operators/take_along_axis_op.cu
+++ b/paddle/fluid/operators/take_along_axis_op.cu
@@ -32,7 +32,7 @@ class TakeAlongAxisCUDAKernel : public framework::OpKernel<T> {
     auto result = ctx.Output<Tensor>("Result");
     result->Resize(index->dims());
     result->mutable_data<T>(ctx.GetPlace());
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       gpu_gather_kernel<T, int32_t>(*input, axis, *index, *result,
                                     ctx.device_context());
@@ -66,7 +66,7 @@ class TakeAlongAxisGradOpCUDAKernel : public framework::OpKernel<T> {
     pten::funcs::SetConstant<platform::CUDADeviceContext, T> functor;
     functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
             input_grad, static_cast<T>(0));
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
 
     if (index_type == framework::proto::VarType::INT32) {
       gpu_scatter_add_kernel<T, int32_t>(
diff --git a/paddle/fluid/operators/take_along_axis_op.h b/paddle/fluid/operators/take_along_axis_op.h
index e7f804621b3f4..66e2f8898b3a0 100644
--- a/paddle/fluid/operators/take_along_axis_op.h
+++ b/paddle/fluid/operators/take_along_axis_op.h
@@ -41,7 +41,7 @@ class TakeAlongAxisOpKernel : public framework::OpKernel<T> {
     result->Resize(index->dims());
     result->mutable_data<T>(ctx.GetPlace());
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       cpu_gather_kernel<T, int32_t>(*input, axis, *index, *result,
                                     ctx.device_context());
@@ -76,7 +76,7 @@ class TakeAlongAxisGradOpKernel : public framework::OpKernel<T> {
     functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
             input_grad, static_cast<T>(0));
 
-    const auto &index_type = index->type();
+    const auto &index_type = framework::TransToProtoVarType(index->dtype());
     if (index_type == framework::proto::VarType::INT32) {
       cpu_scatter_add_kernel<T, int32_t>(
           *input_grad, axis, *index, *result_grad,
diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h
index 3549fc6c45e8d..963dfd3bf7720 100644
--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
@@ -108,7 +108,8 @@ class TDMChildKernel : public framework::OpKernel<T> {
     auto *tree_info_var = ctx.InputVar("TreeInfo");
 
     auto &input_tensor = input_var->Get<LoDTensor>();
-    const auto &input_type = input_tensor.type();
+    const auto &input_type =
+        framework::TransToProtoVarType(input_tensor.dtype());
     bool input_type_match = input_type == framework::proto::VarType::INT32 ||
                             input_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(input_type_match, true,
@@ -122,7 +123,8 @@ class TDMChildKernel : public framework::OpKernel<T> {
                               framework::proto::VarType::INT64)));
 
     auto &tree_info_tensor = tree_info_var->Get<LoDTensor>();
-    const auto &info_type = tree_info_tensor.type();
+    const auto &info_type =
+        framework::TransToProtoVarType(tree_info_tensor.dtype());
     bool info_type_match = info_type == framework::proto::VarType::INT32 ||
                            info_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h
index b740f34b0a346..bf752a9c8ad78 100644
--- a/paddle/fluid/operators/tdm_sampler_op.h
+++ b/paddle/fluid/operators/tdm_sampler_op.h
@@ -244,7 +244,8 @@ class TDMSamplerKernel : public framework::OpKernel<T> {
     auto &travel_lod_tensor = travel_var->Get<framework::LoDTensor>();
     auto &layer_lod_tensor = layer_var->Get<framework::LoDTensor>();
 
-    const auto &input_type = input_tensor.type();
+    const auto &input_type =
+        framework::TransToProtoVarType(input_tensor.dtype());
     bool input_type_match = input_type == framework::proto::VarType::INT32 ||
                             input_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(input_type_match, true,
@@ -257,7 +258,8 @@ class TDMSamplerKernel : public framework::OpKernel<T> {
                           paddle::framework::DataTypeToString(
                               framework::proto::VarType::INT64)));
 
-    const auto &travel_type = travel_lod_tensor.type();
+    const auto &travel_type =
+        framework::TransToProtoVarType(travel_lod_tensor.dtype());
     bool travel_type_match = travel_type == framework::proto::VarType::INT32 ||
                              travel_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(
@@ -271,7 +273,8 @@ class TDMSamplerKernel : public framework::OpKernel<T> {
             paddle::framework::DataTypeToString(
                 framework::proto::VarType::INT64)));
 
-    const auto &layer_type = layer_lod_tensor.type();
+    const auto &layer_type =
+        framework::TransToProtoVarType(layer_lod_tensor.dtype());
     bool layer_type_match = layer_type == framework::proto::VarType::INT32 ||
                             layer_type == framework::proto::VarType::INT64;
     PADDLE_ENFORCE_EQ(layer_type_match, true,
diff --git a/paddle/fluid/operators/tensor_formatter.cc b/paddle/fluid/operators/tensor_formatter.cc
index b65ebee9b2662..ef46ee25156e5 100644
--- a/paddle/fluid/operators/tensor_formatter.cc
+++ b/paddle/fluid/operators/tensor_formatter.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/operators/tensor_formatter.h"
 
 #include <string>
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace paddle {
 namespace operators {
@@ -90,7 +91,8 @@ std::string TensorFormatter::Format(const framework::LoDTensor& print_tensor,
                << std::endl;
   }
 
-  std::type_index dtype = framework::ToTypeIndex(print_tensor.type());
+  std::type_index dtype = framework::ToTypeIndex(
+      framework::TransToProtoVarType(print_tensor.dtype()));
   if (print_tensor_type_) {
     log_stream << "  - dtype: " << platform::demangle(dtype.name())
                << std::endl;
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
index f67295a5dbf7c..41d5a010fd55e 100644
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -513,7 +513,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
 #endif
       }
       runtime_batch = t_shape[0];
-      auto type = t.type();
+      auto type = framework::TransToProtoVarType(t.dtype());
       if (type == framework::proto::VarType::FP32) {
         buffers[bind_index] = static_cast<void *>(t.data<float>());
       } else if (type == framework::proto::VarType::INT64) {
diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc
index bd736f9fc913d..ba7c39e955eee 100644
--- a/paddle/fluid/operators/top_k_op_npu.cc
+++ b/paddle/fluid/operators/top_k_op_npu.cc
@@ -64,7 +64,7 @@ class TopkNPUKernel : public framework::OpKernel<T> {
                                              {"dim", -1},
                                              {"largest", true}};
 
-    Tensor tmp_indices(framework::proto::VarType::INT32);
+    Tensor tmp_indices(experimental::DataType::INT32);
     tmp_indices.Resize(indices->dims());
     tmp_indices.mutable_data<int>(ctx.GetPlace());
 
@@ -77,7 +77,8 @@ class TopkNPUKernel : public framework::OpKernel<T> {
     runner.Run(stream);
 
     // cast indices from INT32 to INT64
-    auto dst_dtype = ConvertToNpuDtype(indices->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(indices->dtype()));
     const auto& runner_cast_indices =
         NpuOpRunner("Cast", {tmp_indices}, {*indices},
                     {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/top_k_v2_op_npu.cc b/paddle/fluid/operators/top_k_v2_op_npu.cc
old mode 100755
new mode 100644
index 843f7620cac44..e11070638834c
--- a/paddle/fluid/operators/top_k_v2_op_npu.cc
+++ b/paddle/fluid/operators/top_k_v2_op_npu.cc
@@ -57,7 +57,7 @@ class TopkV2NPUKernel : public framework::OpKernel<T> {
     out->mutable_data<T>(context.GetPlace());
     indices->mutable_data<int64_t>(context.GetPlace());
 
-    framework::Tensor indices_int32(framework::proto::VarType::INT32);
+    framework::Tensor indices_int32(experimental::DataType::INT32);
     indices_int32.Resize(output_dims);
     indices_int32.mutable_data<int32_t>(context.GetPlace());
 
@@ -77,7 +77,8 @@ class TopkV2NPUKernel : public framework::OpKernel<T> {
         .Run(npu_stream);
 
     // Cast 'indices_int32' to 'indices', from INT32 to INT64
-    auto dst_dtype = ConvertToNpuDtype(indices->type());
+    auto dst_dtype =
+        ConvertToNpuDtype(framework::TransToProtoVarType(indices->type()));
     const auto& npu_op_runner_cast =
         NpuOpRunner("Cast", {indices_int32}, {*indices},
                     {{"dst_type", static_cast<int>(dst_dtype)}});
diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h
index 47c9d1dba91b9..64dd24b4d638a 100644
--- a/paddle/fluid/operators/transfer_layout_op.h
+++ b/paddle/fluid/operators/transfer_layout_op.h
@@ -116,7 +116,8 @@ class TransferLayoutFunctor {
     out->mutable_data(in.place(), in.type());
 
     framework::VisitDataType(
-        in.type(), framework::CastDataLayout(&dev_ctx, axis, in, out));
+        framework::TransToProtoVarType(in.dtype()),
+        framework::CastDataLayout(&dev_ctx, axis, in, out));
   }
 
   const framework::Variable *in_;
diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc
index 7320c1314c6f3..c15c1ffe93d60 100644
--- a/paddle/fluid/operators/transpose_op.cc
+++ b/paddle/fluid/operators/transpose_op.cc
@@ -250,7 +250,8 @@ class Transpose2Op : public TransposeOp {
       library_ = framework::LibraryType::kMKLDNN;
       layout_ = framework::DataLayout::kMKLDNN;
       using framework::proto::VarType;
-      auto input_data_type = ctx.Input<Tensor>("X")->type();
+      auto input_data_type =
+          framework::TransToProtoVarType(ctx.Input<Tensor>("X")->dtype());
       customized_type_value = (input_data_type == VarType::INT8 ||
                                input_data_type == VarType::UINT8)
                                   ? kTransposeMKLDNNINT8
diff --git a/paddle/fluid/operators/tril_triu_op_npu.cc b/paddle/fluid/operators/tril_triu_op_npu.cc
index 02af711567f84..ad1c1814c05cd 100644
--- a/paddle/fluid/operators/tril_triu_op_npu.cc
+++ b/paddle/fluid/operators/tril_triu_op_npu.cc
@@ -52,7 +52,8 @@ class TrilTriuNPUKernel : public framework::OpKernel<T> {
       runner.Run(dev_ctx.stream());
     };
 
-    if (x->type() == framework::proto::VarType::BOOL) {
+    if (framework::TransToProtoVarType(x->dtype()) ==
+        framework::proto::VarType::BOOL) {
       if (lower) {
         NpuOpRunner::TypeAdapter({*x}, {*out}, attr_input, dev_ctx,
                                  op_func_tril,
diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc
index f88fefd1c6a78..261d9cee2d5cd 100644
--- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc
+++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc
@@ -15,6 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/truncated_gaussian_random_op.h"
 #include <memory>
 #include <string>
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/device/npu/npu_op_runner.h"
 
 namespace paddle {
@@ -28,29 +29,29 @@ class TruncatedGaussianRandomNPUKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     // TODO(zhiqiu): support dynamic shape and call ParameterizedTruncatedNormal
     std::vector<int> shape = ctx.Attr<std::vector<int>>("shape");
-    Tensor shape_tensor(framework::proto::VarType::INT32);
+    Tensor shape_tensor(experimental::DataType::INT32);
     shape_tensor.mutable_data<int32_t>({static_cast<int>(shape.size())},
                                        ctx.GetPlace());
     paddle::framework::TensorFromVector(shape, ctx.device_context(),
                                         &shape_tensor);
     float mean = ctx.Attr<float>("mean");
-    Tensor mean_tensor(framework::proto::VarType::FP32);
+    Tensor mean_tensor(experimental::DataType::FLOAT32);
     mean_tensor.mutable_data<float>({1}, ctx.GetPlace());
     FillNpuTensorWithConstant<float>(&mean_tensor, mean);
 
     float std = ctx.Attr<float>("std");
-    Tensor std_tensor(framework::proto::VarType::FP32);
+    Tensor std_tensor(experimental::DataType::FLOAT32);
     std_tensor.mutable_data<float>({1}, ctx.GetPlace());
     FillNpuTensorWithConstant<float>(&std_tensor, std);
 
     int32_t seed_var = ctx.Attr<int32_t>("seed");
 
-    Tensor min_tensor(framework::proto::VarType::FP32);
+    Tensor min_tensor(experimental::DataType::FLOAT32);
     min_tensor.mutable_data<float>({1}, ctx.GetPlace());
     float min_value = mean - std * 2.0;
     FillNpuTensorWithConstant<float>(&min_tensor, min_value);
 
-    Tensor max_tensor(framework::proto::VarType::FP32);
+    Tensor max_tensor(experimental::DataType::FLOAT32);
     max_tensor.mutable_data<float>({1}, ctx.GetPlace());
     float max_value = mean + std * 2.0;
     FillNpuTensorWithConstant<float>(&max_tensor, max_value);
@@ -80,7 +81,7 @@ class NPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
     auto* tensor = context.Output<framework::Tensor>("Out");
     tensor->mutable_data<T>(context.GetPlace());
 
-    Tensor cpu_tensor(tensor->type());
+    Tensor cpu_tensor(tensor->dtype());
     cpu_tensor.Resize(tensor->dims());
     T* cpu_data = cpu_tensor.mutable_data<T>(platform::CPUPlace());
     std::uniform_real_distribution<T> dist(std::numeric_limits<float>::min(),
diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h
index f4ae8d8269056..adafbd1fd484e 100644
--- a/paddle/fluid/operators/uniform_random_op.h
+++ b/paddle/fluid/operators/uniform_random_op.h
@@ -25,7 +25,8 @@ using Tensor = framework::Tensor;
 
 inline std::vector<int64_t> GetNewDataFromShapeTensor(
     const Tensor* new_data_tensor) {
-  if (new_data_tensor->type() == framework::proto::VarType::INT64) {
+  if (framework::TransToProtoVarType(new_data_tensor->dtype()) ==
+      framework::proto::VarType::INT64) {
     auto* new_data = new_data_tensor->data<int64_t>();
     framework::Tensor cpu_starts_tensor;
     if (platform::is_gpu_place(new_data_tensor->place())) {
@@ -36,7 +37,8 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
     std::vector<int64_t> vec_new_data(new_data,
                                       new_data + new_data_tensor->numel());
     return vec_new_data;
-  } else if (new_data_tensor->type() == framework::proto::VarType::INT32) {
+  } else if (framework::TransToProtoVarType(new_data_tensor->dtype()) ==
+             framework::proto::VarType::INT32) {
     auto* new_data = new_data_tensor->data<int32_t>();
     std::vector<int64_t> vec_new_data;
     framework::Tensor cpu_starts_tensor;
@@ -53,7 +55,7 @@ inline std::vector<int64_t> GetNewDataFromShapeTensor(
     PADDLE_THROW(platform::errors::InvalidArgument(
         "Expected dtype of ShapeTensor must be int32, int64. But got "
         "unsupport dtype: %s.",
-        paddle::framework::DataTypeToString(new_data_tensor->type())));
+        new_data_tensor->dtype()));
   }
 }
 
@@ -70,7 +72,8 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(
             "But received tensor's dim=%s.",
             tensor->dims()));
 
-    if (tensor->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) ==
+        framework::proto::VarType::INT32) {
       if (platform::is_gpu_place(tensor->place())) {
         framework::Tensor temp;
         paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
@@ -78,7 +81,8 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(
       } else {
         vec_new_shape.push_back(static_cast<int64_t>(*tensor->data<int32_t>()));
       }
-    } else if (tensor->type() == framework::proto::VarType::INT64) {
+    } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+               framework::proto::VarType::INT64) {
       if (platform::is_gpu_place(tensor->place())) {
         framework::Tensor temp;
         paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
@@ -91,7 +95,8 @@ inline std::vector<int64_t> GetNewDataFromShapeTensorList(
           "Expected dtype of ShapeTensorList of %d-th must be int32, int64. "
           "But got "
           "unsupport dtype: %s.",
-          i, paddle::framework::DataTypeToString(tensor->type())));
+          i, paddle::framework::DataTypeToString(
+                 framework::TransToProtoVarType(tensor->dtype()))));
     }
   }
 
diff --git a/paddle/fluid/operators/uniform_random_op_npu.cc b/paddle/fluid/operators/uniform_random_op_npu.cc
index 6812a2b0b7085..269cf7cc11eb6 100644
--- a/paddle/fluid/operators/uniform_random_op_npu.cc
+++ b/paddle/fluid/operators/uniform_random_op_npu.cc
@@ -60,7 +60,7 @@ class NPUUniformRandomKernel : public framework::OpKernel<T> {
     tensor->mutable_data<T>(ctx.GetPlace());
     int64_t size = tensor->numel();
 
-    Tensor cpu_tensor(tensor->type());
+    Tensor cpu_tensor(tensor->dtype());
     cpu_tensor.Resize(tensor->dims());
     T *data_cpu = cpu_tensor.mutable_data<T>(platform::CPUPlace());
 
diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h
index c3d291d1201c6..8c3e33d44dcc5 100644
--- a/paddle/fluid/operators/unique_op.h
+++ b/paddle/fluid/operators/unique_op.h
@@ -77,7 +77,7 @@ struct UniqueOpFunctor {
       // init count_data to 0
       memset(count_data, 0, uniq.size() * sizeof(IndexT));
 
-      const auto& index_type = index_->type();
+      const auto& index_type = framework::TransToProtoVarType(index_->dtype());
       bool index_type_match = index_type == framework::proto::VarType::INT32 ||
                               index_type == framework::proto::VarType::INT64;
       PADDLE_ENFORCE_EQ(index_type_match, true,
diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc
index e2cbf73aa1316..82b1aab7bfb7f 100644
--- a/paddle/fluid/operators/unsqueeze_op.cc
+++ b/paddle/fluid/operators/unsqueeze_op.cc
@@ -132,8 +132,10 @@ class UnsqueezeOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
-    return framework::OpKernelType(ctx.Input<framework::LoDTensor>("X")->type(),
-                                   ctx.device_context());
+    return framework::OpKernelType(
+        framework::TransToProtoVarType(
+            ctx.Input<framework::LoDTensor>("X")->type()),
+        ctx.device_context());
   }
 
   framework::OpKernelType GetKernelTypeForVar(
diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h
index a413c4a331b65..78506c12bc6d7 100644
--- a/paddle/fluid/operators/utils.h
+++ b/paddle/fluid/operators/utils.h
@@ -23,7 +23,8 @@ namespace operators {
 template <typename T = int32_t>
 inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
   std::vector<T> vec_new_data;
-  if (x->type() == framework::proto::VarType::INT32) {
+  if (framework::TransToProtoVarType(x->dtype()) ==
+      framework::proto::VarType::INT32) {
     auto* data = x->data<int>();
     framework::Tensor cpu_attr_tensor;
     if (!platform::is_cpu_place(x->place())) {
@@ -32,7 +33,8 @@ inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
       data = cpu_attr_tensor.data<int>();
     }
     vec_new_data = std::vector<T>(data, data + x->numel());
-  } else if (x->type() == framework::proto::VarType::INT64) {
+  } else if (framework::TransToProtoVarType(x->dtype()) ==
+             framework::proto::VarType::INT64) {
     auto* data = x->data<int64_t>();
     framework::Tensor cpu_attr_tensor;
     if (!platform::is_cpu_place(x->place())) {
@@ -45,7 +47,7 @@ inline std::vector<T> GetDataFromTensor(const framework::Tensor* x) {
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "The dtype of Tensor must be int32 or int64, but received: %s",
-        x->type()));
+        framework::TransToProtoVarType(x->dtype())));
   }
   return vec_new_data;
 }
@@ -63,7 +65,8 @@ inline std::vector<T> GetDataFromTensorList(
                           "is [%s]",
                           tensor->dims()));
 
-    if (tensor->type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(tensor->dtype()) ==
+        framework::proto::VarType::INT32) {
       if (!platform::is_cpu_place(tensor->place())) {
         framework::Tensor temp;
         paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
@@ -71,7 +74,8 @@ inline std::vector<T> GetDataFromTensorList(
       } else {
         vec_new_data.push_back(static_cast<T>(*tensor->data<int>()));
       }
-    } else if (tensor->type() == framework::proto::VarType::INT64) {
+    } else if (framework::TransToProtoVarType(tensor->dtype()) ==
+               framework::proto::VarType::INT64) {
       if (!platform::is_cpu_place(tensor->place())) {
         framework::Tensor temp;
         paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
@@ -84,7 +88,7 @@ inline std::vector<T> GetDataFromTensorList(
       PADDLE_THROW(platform::errors::InvalidArgument(
           "The dtype of Tensor in list must be int32 or int64, but received: "
           "%s",
-          tensor->type()));
+          tensor->dtype()));
     }
   }
   return vec_new_data;
diff --git a/paddle/fluid/operators/where_index_op_npu.cc b/paddle/fluid/operators/where_index_op_npu.cc
index 226f1461ed439..8c7c4e25d692e 100644
--- a/paddle/fluid/operators/where_index_op_npu.cc
+++ b/paddle/fluid/operators/where_index_op_npu.cc
@@ -37,7 +37,8 @@ class NPUWhereIndexKernel : public framework::OpKernel<T> {
 
     // Run Cast and ReduceSum to get 0 dim of Out
     Tensor booled_cond;
-    if (condition->type() != framework::proto::VarType::BOOL) {
+    if (framework::TransToProtoVarType(condition->dtype()) !=
+        framework::proto::VarType::BOOL) {
       auto bool_type = ConvertToNpuDtype(framework::proto::VarType::BOOL);
       booled_cond.mutable_data<bool>(dims, place);
       const auto& booled_runner =
diff --git a/paddle/fluid/operators/where_op_npu.cc b/paddle/fluid/operators/where_op_npu.cc
index bb77f35daceb0..d4294393daa34 100755
--- a/paddle/fluid/operators/where_op_npu.cc
+++ b/paddle/fluid/operators/where_op_npu.cc
@@ -58,7 +58,7 @@ class WhereGradNPUKernel : public framework::OpKernel<T> {
         ctx.template device_context<paddle::platform::NPUDeviceContext>()
             .stream();
 
-    framework::Tensor tensor_zeros(dout_t->type());
+    framework::Tensor tensor_zeros(dout_t->dtype());
     tensor_zeros.mutable_data<T>(dout_t->dims(), ctx.GetPlace());
     const auto& runner =
         NpuOpRunner("ZerosLike", {*dout_t}, {tensor_zeros}, {});
diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h
index a75759e2ae079..9b02d11b3f97f 100644
--- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h
+++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h
@@ -23,6 +23,7 @@
 #include <string>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h"
 #include "paddle/fluid/platform/device_context.h"
 
@@ -152,8 +153,9 @@ class TensorDescriptor {
       dims_with_group[1] = dims_with_group[1] / groups;
     }
     PADDLE_ENFORCE_GPU_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
-        desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
-        dims_with_group.data(), strides.data()));
+        desc_.get(),
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())),
+        dims_with_group.size(), dims_with_group.data(), strides.data()));
   }
 
   void set(const std::vector<int>& dims, const cudnnTensorFormat_t format,
@@ -171,7 +173,8 @@ class TensorDescriptor {
 
   void set(const Tensor& tensor, const cudnnTensorFormat_t format) {
     auto dims = framework::vectorize<int>(tensor.dims());
-    auto dtype = ToCudnnDataType(tensor.type());
+    auto dtype =
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype()));
     set(dims, format, dtype);
   }
 
@@ -217,7 +220,8 @@ class FilterDescriptor {
   void set(const Tensor& tensor, const cudnnTensorFormat_t format,
            const int groups = 1) {
     auto dims = framework::vectorize<int>(tensor.dims());
-    auto dtype = ToCudnnDataType(tensor.type());
+    auto dtype =
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype()));
     set(dims, format, dtype, groups);
   }
 
diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h
index f4533c859fcd3..7acf713d68809 100644
--- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h
+++ b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h
@@ -142,7 +142,8 @@ class TensorDescriptor {
       dims_with_group[1] = dims_with_group[1] / groups;
     }
     PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
-        (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()),
+        (miopenTensorDescriptor_t)(desc_.get()),
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())),
         static_cast<int>(dims_with_group.size()),
         const_cast<int*>(dims_with_group.data()),
         const_cast<int*>(strides.data())));
@@ -164,7 +165,8 @@ class TensorDescriptor {
       dims_with_group[1] = dims_with_group[1] / groups;
     }
     PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
-        (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()),
+        (miopenTensorDescriptor_t)(desc_.get()),
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())),
         static_cast<int>(dims_with_group.size()),
         const_cast<int*>(dims_with_group.data()),
         const_cast<int*>(strides.data())));
@@ -209,7 +211,8 @@ class FilterDescriptor {
       dims_with_group[1] = dims_with_group[1] / groups;
     }
     PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetTensorDescriptor(
-        (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.type()),
+        (miopenTensorDescriptor_t)(desc_.get()),
+        ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())),
         static_cast<int>(dims_with_group.size()),
         const_cast<int*>(dims_with_group.data()),
         const_cast<int*>(strides.data())));
diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
index 8bedca5c0b82f..93c5cc90762ca 100644
--- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc
@@ -297,7 +297,9 @@ void Compiler::LowerConstants(const Graph* graph, const Scope* scope) {
       tensor->Resize(ddim);
 
       auto const_data = std::unique_ptr<popart::ConstVoidData>();
-      popart::TensorInfo tensor_info(VarType2PopartType(tensor->type()), shape);
+      popart::TensorInfo tensor_info(
+          VarType2PopartType(framework::TransToProtoVarType(tensor->dtype())),
+          shape);
       const_data.reset(new popart::ConstVoidData(tensor->data(), tensor_info));
       popart::TensorId result = builder_->aiOnnxOpset11().constant(*const_data);
       SetIpuIndexStage(result, op_desc);
@@ -325,7 +327,8 @@ void Compiler::LowerWeights(const Graph* graph, const Scope* scope) {
         auto var = scope->FindVar(var_name);
         if (var) {
           auto tensor = var->Get<framework::LoDTensor>();
-          auto dtype = VarType2PopartType(tensor.type());
+          auto dtype = VarType2PopartType(
+              framework::TransToProtoVarType(tensor.dtype()));
           auto shape = std::vector<int64_t>();
           for (size_t i = 0; i < tensor.dims().size(); ++i) {
             shape.push_back(tensor.dims().at(i));
diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc
index df2f1c786e947..5da785c1b6f4a 100644
--- a/paddle/fluid/platform/device/ipu/ipu_executor.cc
+++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc
@@ -112,7 +112,8 @@ void Executor::Run(const std::vector<const Tensor *> &inputs,
     tensor->Resize(framework::make_ddim(output_shape));
     auto fetch_dtype = fetch_info.dataType();
     auto paddle_type = PopartType2VarType(fetch_dtype);
-    tensor->mutable_data(ctx.GetPlace(), paddle_type);
+    tensor->mutable_data(ctx.GetPlace(),
+                         framework::TransToPtenDataType(paddle_type));
     anchor_wrappers.emplace(tensor_id, PaddleIArray(tensor));
     popart_anchors.emplace(tensor_id, anchor_wrappers.at(tensor_id));
   }
diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h
index 3cd7115b5ebf3..1b8d25acff473 100644
--- a/paddle/fluid/platform/device/ipu/ipu_utils.h
+++ b/paddle/fluid/platform/device/ipu/ipu_utils.h
@@ -89,7 +89,8 @@ bool GetBoolEnv(std::string str);
 
 template <typename T>
 std::unique_ptr<popart::NDArrayWrapper<T>> Tensor2IArray(const Tensor& tensor) {
-  auto dtype = VarType2PopartType(tensor.type());
+  auto dtype =
+      VarType2PopartType(framework::TransToProtoVarType(tensor.dtype()));
   auto shape = std::vector<int64_t>();
   for (size_t i = 0; i < tensor.dims().size(); ++i) {
     shape.push_back(tensor.dims().at(i));
diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.cc b/paddle/fluid/platform/device/npu/npu_op_runner.cc
index b9c92e07612b0..14bad6f7479c3 100644
--- a/paddle/fluid/platform/device/npu/npu_op_runner.cc
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.cc
@@ -373,7 +373,8 @@ std::vector<aclDataBuffer *> &NpuOpRunner::GetOutputBuffers() {
 
 aclTensorDesc *NpuOpRunner::CreateTensorDesc(Tensor tensor,
                                              aclMemType mem_type) {
-  auto dtype = ConvertToNpuDtype(tensor.type());
+  auto dtype =
+      ConvertToNpuDtype(framework::TransToProtoVarType(tensor.dtype()));
   auto format = ConvertToNpuFormat(tensor.layout());
   auto dims = framework::vectorize(tensor.dims());
   int size = dims.size();
@@ -459,12 +460,14 @@ void NpuOpRunner::TypeAdapter(
 
   for (size_t i = 0; i < input_type.size(); ++i) {
     bool cast_input =
-        (input_type[i] == -1 || input_type[i] != inputs[i].type());
+        (input_type[i] == -1 ||
+         input_type[i] != framework::TransToProtoVarType(inputs[i].dtype()));
     if (!cast_input) {
       tmp_inputs[i].ShareDataWith(inputs[i]);
     } else {
       tmp_inputs[i].Resize(inputs[i].dims());
-      tmp_inputs[i].mutable_data(dev_ctx.GetPlace(), input_type[i]);
+      tmp_inputs[i].mutable_data(dev_ctx.GetPlace(),
+                                 framework::TransToPtenDataType(input_type[i]));
 
       const auto &cast_runner = NpuOpRunner(
           "Cast", {inputs[i]}, {tmp_inputs[i]},
@@ -474,12 +477,14 @@ void NpuOpRunner::TypeAdapter(
   }
   for (size_t i = 0; i < output_type.size(); ++i) {
     bool cast_output =
-        (output_type[i] == -1 || output_type[i] != outputs[i].type());
+        (output_type[i] == -1 ||
+         output_type[i] != framework::TransToProtoVarType(outputs[i].dtype()));
     if (!cast_output) {
       tmp_outputs[i].ShareDataWith(outputs[i]);
     } else {
       tmp_outputs[i].Resize(outputs[i].dims());
-      tmp_outputs[i].mutable_data(dev_ctx.GetPlace(), output_type[i]);
+      tmp_outputs[i].mutable_data(
+          dev_ctx.GetPlace(), framework::TransToPtenDataType(output_type[i]));
     }
   }
 
@@ -487,12 +492,14 @@ void NpuOpRunner::TypeAdapter(
 
   for (size_t i = 0; i < output_type.size(); ++i) {
     bool cast_output =
-        (output_type[i] == -1 || output_type[i] != outputs[i].type());
+        (output_type[i] == -1 ||
+         output_type[i] != framework::TransToProtoVarType(outputs[i].dtype()));
     if (cast_output) {
       const auto &cast_runner = NpuOpRunner(
           "Cast", {tmp_outputs[i]}, {outputs[i]},
           {{"dst_type",
-            static_cast<int>(ConvertToNpuDtype(outputs[i].type()))}});
+            static_cast<int>(ConvertToNpuDtype(
+                framework::TransToProtoVarType(outputs[i].dtype())))}});
       cast_runner.Run(dev_ctx.stream());
     }
   }
diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.h b/paddle/fluid/platform/device/npu/npu_op_runner.h
index 39d2b9ffa9b1d..7583e00343575 100644
--- a/paddle/fluid/platform/device/npu/npu_op_runner.h
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.h
@@ -21,6 +21,7 @@ limitations under the License. */
 #include <vector>
 
 #include "acl/acl.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/platform/device/npu/enforce_npu.h"
 
@@ -143,7 +144,7 @@ void FillNpuTensorWithConstant(Tensor *tensor, T val) {
 
   int numel = tensor->numel();
   if (numel == 1) {
-    Tensor npu_pinned_tensor(tensor->type());
+    Tensor npu_pinned_tensor(tensor->dtype());
     platform::NPUPinnedPlace npu_pinned_place;
     auto npu_pinned_ptr =
         npu_pinned_tensor.mutable_data<T>({1}, npu_pinned_place);
diff --git a/paddle/fluid/platform/lodtensor_printer.cc b/paddle/fluid/platform/lodtensor_printer.cc
index ca5d156802851..159e7354279ad 100644
--- a/paddle/fluid/platform/lodtensor_printer.cc
+++ b/paddle/fluid/platform/lodtensor_printer.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/platform/lodtensor_printer.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/scope.h"
 
 namespace pten {
@@ -50,29 +51,29 @@ void PrintVar(framework::Scope* scope, const std::string& var_name,
 
   *sstream << print_info;
 
-#define PrintTensorCallback(cpp_type, proto_type)                       \
-  do {                                                                  \
-    if (tensor->type() == proto_type) {                                 \
-      *sstream << "[";                                                  \
-      const cpp_type* data = nullptr;                                   \
-      framework::LoDTensor cpu_tensor;                                  \
-      if (is_cpu_place(tensor->place())) {                              \
-        data = tensor->data<cpp_type>();                                \
-      } else {                                                          \
-        platform::CPUPlace cpu_place;                                   \
-        paddle::framework::TensorCopy(*tensor, cpu_place, &cpu_tensor); \
-        data = cpu_tensor.data<cpp_type>();                             \
-      }                                                                 \
-      auto element_num = tensor->numel();                               \
-      *sstream << element_num << "]:[";                                 \
-      if (element_num > 0) {                                            \
-        *sstream << data[0];                                            \
-        for (int j = 1; j < element_num; ++j) {                         \
-          *sstream << " " << data[j];                                   \
-        }                                                               \
-      }                                                                 \
-      *sstream << "]";                                                  \
-    }                                                                   \
+#define PrintTensorCallback(cpp_type, proto_type)                        \
+  do {                                                                   \
+    if (framework::TransToProtoVarType(tensor->dtype()) == proto_type) { \
+      *sstream << "[";                                                   \
+      const cpp_type* data = nullptr;                                    \
+      framework::LoDTensor cpu_tensor;                                   \
+      if (is_cpu_place(tensor->place())) {                               \
+        data = tensor->data<cpp_type>();                                 \
+      } else {                                                           \
+        platform::CPUPlace cpu_place;                                    \
+        paddle::framework::TensorCopy(*tensor, cpu_place, &cpu_tensor);  \
+        data = cpu_tensor.data<cpp_type>();                              \
+      }                                                                  \
+      auto element_num = tensor->numel();                                \
+      *sstream << element_num << "]:[";                                  \
+      if (element_num > 0) {                                             \
+        *sstream << data[0];                                             \
+        for (int j = 1; j < element_num; ++j) {                          \
+          *sstream << " " << data[j];                                    \
+        }                                                                \
+      }                                                                  \
+      *sstream << "]";                                                   \
+    }                                                                    \
   } while (0)
 
   _ForEachDataType_(PrintTensorCallback);
diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
index 69fe068a9b4a8..8d706263f029c 100644
--- a/paddle/fluid/platform/mkldnn_reuse.h
+++ b/paddle/fluid/platform/mkldnn_reuse.h
@@ -1050,7 +1050,8 @@ class ReorderMKLDNNHandler {
                                                  const MKLDNNMemoryFormat& fmt,
                                                  platform::Place place) {
     auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt);
-    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    auto dst_data = output->mutable_data(
+        place, framework::TransToPtenDataType(vtype_dst_), dst_md.get_size());
     return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
   }
 
@@ -1058,7 +1059,8 @@ class ReorderMKLDNNHandler {
       framework::Tensor* output, const std::vector<int64_t>& dims,
       const MKLDNNMemoryFormat& fmt, platform::Place place) {
     auto dst_md = platform::MKLDNNMemDesc(dims, dtype_dst_, fmt);
-    auto dst_data = output->mutable_data(place, vtype_dst_, dst_md.get_size());
+    auto dst_data = output->mutable_data(
+        place, framework::TransToPtenDataType(vtype_dst_), dst_md.get_size());
     return std::make_shared<dnnl::memory>(dst_md, engine_, dst_data);
   }
 
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 1db7339664ace..f4e5df800dada 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -79,7 +80,8 @@ void EmptyEagerTensorInitializer(
     std::shared_ptr<pten::DenseTensor> dense_tensor =
         std::make_shared<pten::DenseTensor>(
             pten::make_intrusive<paddle::experimental::SharedStorage>(place),
-            pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims));
+            pten::DenseTensorMeta(paddle::framework::TransToPtenDataType(dtype),
+                                  ddims));
     dense_tensor->mutable_data(place);
     self->tensor.set_impl(dense_tensor);
   } else {
diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc
index 7aa810acc28cd..a32edae2ad23c 100644
--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/backward.h"
 #include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -59,7 +60,7 @@ class EagerNumpyAllocation : public pten::Allocation {
   explicit EagerNumpyAllocation(PyObject* numpy_data, pten::DataType dtype)
       : Allocation(
             static_cast<void*>(pybind11::detail::array_proxy(numpy_data)->data),
-            pten::DataTypeSize(dtype) * PyArray_Size_(numpy_data),
+            framework::DataTypeSize(dtype) * PyArray_Size_(numpy_data),
             paddle::platform::CPUPlace()),
         arr_(numpy_data) {
     PADDLE_ENFORCE_NOT_NULL(arr_, platform::errors::InvalidArgument(
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 511cc10d645ea..6865379036608 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/fluid/eager/api/all.h"
 #include "paddle/fluid/eager/autograd_meta.h"
 #include "paddle/fluid/eager/utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -51,7 +52,7 @@ static PyObject* eager_tensor_method_numpy(TensorObject* self, PyObject* args,
           self->tensor.name()));
   auto tensor_dims = self->tensor.shape();
   auto numpy_dtype = TensorDtype2NumpyDtype(self->tensor.type());
-  auto sizeof_dtype = pten::DataTypeSize(self->tensor.type());
+  auto sizeof_dtype = paddle::framework::DataTypeSize(self->tensor.type());
   Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank];
   Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank];
   size_t numel = 1;
@@ -83,7 +84,8 @@ static PyObject* eager_tensor_method_numpy(TensorObject* self, PyObject* args,
 
     paddle::platform::GpuMemcpySync(
         pybind11::detail::array_proxy(array)->data, dense_tensor->data(),
-        pten::DataTypeSize(dense_tensor->dtype()) * dense_tensor->numel(),
+        paddle::framework::DataTypeSize(dense_tensor->dtype()) *
+            dense_tensor->numel(),
         cudaMemcpyDeviceToHost);
 #endif
   } else {
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 9fb0941fa361b..942df3f69dac0 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -172,7 +172,8 @@ PyObject* eager_tensor_properties_get_place_str(TensorObject* self,
 
 PyObject* eager_tensor_properties_get_dtype(TensorObject* self, void* closure) {
   EAGER_SYNC_TRY
-  return ToPyObject(pten::TransToProtoVarType(self->tensor.type()));
+  return ToPyObject(
+      paddle::framework::TransToProtoVarType(self->tensor.type()));
   EAGER_CATCH_AND_THROW_RETURN_NULL
 }
 
diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc
index 61b8f1fe010d9..c84a71d8aaa00 100644
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -30,6 +30,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/scope_guard.h"
 #include "paddle/fluid/imperative/all_reduce.h"
 #include "paddle/fluid/imperative/amp_auto_cast.h"
@@ -187,7 +188,7 @@ static void InitVarBaseAndTensor(
         "Place should be one of "
         "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace/MLUPlace"));
   }
-  self->SetDataType(tensor->type());
+  self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
 }
 
 static void InitVarBaseFromNumpyWithKwargs(imperative::VarBase *self,
@@ -242,7 +243,7 @@ static void InitVarBaseFromNumpyWithArg(imperative::VarBase *self,
   }
   SetTensorFromPyArray<P>(tensor, array, place, zero_copy);
   self->SetType(framework::proto::VarType::LOD_TENSOR);
-  self->SetDataType(tensor->type());
+  self->SetDataType(framework::TransToProtoVarType(tensor->dtype()));
 }
 
 static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self,
@@ -264,7 +265,7 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self,
   new (self) imperative::VarBase(name_);
   self->SetPersistable(false);
   self->SetType(framework::proto::VarType::LOD_TENSOR);
-  self->SetDataType(tensor.type());
+  self->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
   auto *new_tensor = self->MutableVar()->GetMutable<framework::LoDTensor>();
   // Same place，share data directly
   if (place == tensor.place()) {
@@ -289,7 +290,7 @@ static void InitVarBaseFromTensorWithArg(imperative::VarBase *self,
   new (self) imperative::VarBase(name_);
   self->SetPersistable(false);
   self->SetType(framework::proto::VarType::LOD_TENSOR);
-  self->SetDataType(tensor.type());
+  self->SetDataType(framework::TransToProtoVarType(tensor.dtype()));
   auto *new_tensor = self->MutableVar()->GetMutable<framework::LoDTensor>();
   // Same place，share data directly
   if (platform::is_same_place(place, tensor.place())) {
@@ -433,9 +434,11 @@ static Py_ssize_t GetSliceIndexFromTensor(
     const std::shared_ptr<imperative::VarBase> &tensor_index) {
   const auto &tensor = tensor_index->Var().Get<framework::LoDTensor>();
   if (tensor.numel() == 1) {
-    if (tensor.type() == framework::proto::VarType::INT32) {
+    if (framework::TransToProtoVarType(tensor.dtype()) ==
+        framework::proto::VarType::INT32) {
       return static_cast<Py_ssize_t>(operators::GetValue<int32_t>(&tensor));
-    } else if (tensor.type() == framework::proto::VarType::INT64) {
+    } else if (framework::TransToProtoVarType(tensor.dtype()) ==
+               framework::proto::VarType::INT64) {
       return static_cast<Py_ssize_t>(operators::GetValue<int64_t>(&tensor));
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
@@ -787,7 +790,7 @@ void BindImperative(py::module *m_ptr) {
                                                    platform::CPUPlace(), true);
           // 3. allocate shared memory
           void *data_ptr = t.data();
-          size_t data_size = t.numel() * framework::SizeOfType(t.type());
+          size_t data_size = t.numel() * framework::DataTypeSize(t.dtype());
           auto shared_writer_holder =
               memory::allocation::AllocateMemoryMapWriterAllocation(data_size);
           // 4. maintain mmap fd set & backup ipc_name
@@ -822,7 +825,7 @@ void BindImperative(py::module *m_ptr) {
                                                    platform::CPUPlace(), true);
           // 3. allocate shared memory
           void *data_ptr = t.data();
-          size_t data_size = t.numel() * framework::SizeOfType(t.type());
+          size_t data_size = t.numel() * framework::DataTypeSize(t.dtype());
           auto shared_writer_holder =
               memory::allocation::AllocateMemoryMapWriterAllocation(data_size);
           // 4. maintain mmap fd set & backup ipc_name
@@ -1314,7 +1317,7 @@ void BindImperative(py::module *m_ptr) {
               }
             }
 #define TENSOR_TO_PY_SCALAR(T, proto_type)                                   \
-  if (tensor.type() == proto_type) {                                         \
+  if (framework::TransToProtoVarType(tensor.dtype()) == proto_type) {        \
     std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(proto_type); \
     T b = TensorGetElement<T>(tensor, offset);                               \
     return py::array(py::dtype(py_dtype_str.c_str()), {}, {},                \
@@ -1324,8 +1327,7 @@ void BindImperative(py::module *m_ptr) {
             _ForEachDataType_(TENSOR_TO_PY_SCALAR);
 #undef TENSOR_TO_PY_SCALAR
             PADDLE_THROW(platform::errors::Unimplemented(
-                "Unsupported tensor data type: %s",
-                framework::DataTypeToString(tensor.type())));
+                "Unsupported tensor data type: %s", tensor.dtype()));
           },
           py::return_value_policy::copy)
       .def("_inplace_version",
@@ -1852,7 +1854,9 @@ void BindImperative(py::module *m_ptr) {
              auto *t = self->MutableVar()->GetMutable<framework::LoDTensor>();
              // 2. allocate shared memory
              void *data_ptr = t->data();
-             size_t data_size = t->numel() * framework::SizeOfType(t->type());
+             size_t data_size =
+                 t->numel() * framework::SizeOfType(
+                                  framework::TransToProtoVarType(t->dtype()));
              auto shared_writer_holder =
                  memory::allocation::AllocateMemoryMapWriterAllocation(
                      data_size);
@@ -1887,7 +1891,9 @@ void BindImperative(py::module *m_ptr) {
              // Register the cpu memory as the cuda host memory
              const auto &data_numel = self_tensor->numel();
              const size_t &need_allocate_size =
-                 data_numel * framework::SizeOfType(self_tensor->type());
+                 data_numel *
+                 framework::SizeOfType(
+                     framework::TransToProtoVarType(self_tensor->dtype()));
              void *data_ptr = self_tensor->data();
              auto result = cudaHostRegister(data_ptr, need_allocate_size,
                                             cudaHostRegisterDefault);
@@ -1907,7 +1913,7 @@ void BindImperative(py::module *m_ptr) {
                  std::make_shared<memory::allocation::Allocation>(
                      cuda_device_pointer, need_allocate_size,
                      platform::CUDAPlace(device_id));
-             self_tensor->ResetHolderWithType(holder, self_tensor->type());
+             self_tensor->ResetHolderWithType(holder, self_tensor->dtype());
            },
            py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC(
         Returns self tensor with the UVA(unified virtual addressing).
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 8f20daf337d2c..959e34afe3da6 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -28,6 +28,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/custom_operator.h"
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/data_type_transform.h"
@@ -458,7 +459,9 @@ static void inline CreateVariableIfNotExit(
         var = const_cast<framework::Scope *>(&scope)->Var(para_name);
         auto *tensor_temp = var->GetMutable<framework::LoDTensor>();
         tensor_temp->Resize(framework::make_ddim(var_desc.GetShape()));
-        tensor_temp->mutable_data(exe->GetPlace(), var_desc.GetDataType());
+        tensor_temp->mutable_data(
+            exe->GetPlace(),
+            framework::TransToPtenDataType(var_desc.GetDataType()));
       }
     }
   } else {
@@ -817,33 +820,39 @@ PYBIND11_MODULE(core_noavx, m) {
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::CPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::XPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::CUDAPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::CUDAPinnedPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::MLUPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_clear", &framework::Tensor::clear)
       .def("_mutable_data",
            [](framework::Tensor &self, paddle::platform::NPUPlace &place,
               paddle::framework::proto::VarType::Type type) {
-             return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
+             return reinterpret_cast<uintptr_t>(self.mutable_data(
+                 place, framework::TransToPtenDataType(type)));
            })
       .def("_copy_from", &TensorCopyFrom<paddle::platform::CPUPlace>,
            py::arg("tensor"), py::arg("place"), py::arg("batch_size") = -1)
@@ -941,7 +950,10 @@ PYBIND11_MODULE(core_noavx, m) {
       .def("_set_double_element", TensorSetElement<double>)
       .def("_get_double_element", TensorGetElement<double>)
       .def("_place", [](framework::Tensor &self) { return self.place(); })
-      .def("_dtype", [](framework::Tensor &self) { return self.type(); })
+      .def("_dtype",
+           [](framework::Tensor &self) {
+             return framework::TransToProtoVarType(self.type());
+           })
       .def("_layout",
            [](framework::Tensor &self) {
              return DataLayoutToString(self.layout());
@@ -1207,7 +1219,7 @@ PYBIND11_MODULE(core_noavx, m) {
             // 4. Rebuild Tensor
             tensor.ResetHolderWithType(
                 shared_reader_holder,
-                static_cast<proto::VarType::Type>(t[2].cast<int>()));
+                static_cast<paddle::experimental::DataType>(t[2].cast<int>()));
             tensor.Resize(make_ddim(t[3].cast<std::vector<int>>()));
             tensor.set_lod(t[4].cast<framework::LoD>());
 
diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc
index d4fa1b2c89abf..1e6a30a3ba783 100644
--- a/paddle/fluid/pybind/reader_py.cc
+++ b/paddle/fluid/pybind/reader_py.cc
@@ -353,7 +353,8 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) {
                auto new_var = std::make_shared<imperative::VarBase>(act_name);
                new_var->SetPersistable(false);
                new_var->SetType(framework::proto::VarType::LOD_TENSOR);
-               new_var->SetDataType(lod_tensor.type());
+               new_var->SetDataType(
+                   framework::TransToProtoVarType(lod_tensor.dtype()));
                auto *tensor =
                    new_var->MutableVar()->GetMutable<framework::LoDTensor>();
                *tensor = std::move(lod_tensor);
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index dac84abfb9857..9a11c5946f318 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -31,6 +31,7 @@ limitations under the License. */
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #endif
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/fluid/platform/profiler.h"
@@ -308,7 +309,7 @@ void SetTensorFromPyArrayT(
     if (zero_copy) {
       auto holder = std::make_shared<details::NumpyAllocation<T>>(array);
       auto type = framework::ToDataType(std::type_index(typeid(T)));
-      self->ResetHolderWithType(holder, type);
+      self->ResetHolderWithType(holder, framework::TransToPtenDataType(type));
     } else {
       auto dst = self->mutable_data<T>(place);
       std::memcpy(dst, array.data(), array.nbytes());
@@ -332,7 +333,7 @@ void SetTensorFromPyArrayT(
     if (zero_copy) {
       auto holder = std::make_shared<details::NumpyAllocation<T>>(array);
       auto type = framework::ToDataType(std::type_index(typeid(T)));
-      self->ResetHolderWithType(holder, type);
+      self->ResetHolderWithType(holder, framework::TransToPtenDataType(type));
     } else {
       auto dst = self->mutable_data<T>(place);
       std::memcpy(dst, array.data(), array.nbytes());
@@ -477,7 +478,8 @@ void SetUVATensorFromPyArray(
       std::make_shared<memory::allocation::Allocation>(
           cuda_device_pointer, need_allocate_size,
           platform::CUDAPlace(device_id));
-  self_tensor->ResetHolderWithType(holder, data_type);
+  self_tensor->ResetHolderWithType(holder,
+                                   framework::TransToPtenDataType(data_type));
 #endif
 }
 
@@ -577,21 +579,21 @@ inline framework::Tensor *_getTensor(const framework::Tensor &self,
   output->Resize(ddim);
   auto place = self.place();
   if (platform::is_cpu_place(place)) {
-    output->mutable_data(place, self.type());
+    output->mutable_data(place, self.dtype());
   } else if (platform::is_xpu_place(place)) {
 #ifdef PADDLE_WITH_XPU
-    output->mutable_data(place, self.type());
+    output->mutable_data(place, self.dtype());
 #endif
   } else if (platform::is_mlu_place(place)) {
 #ifdef PADDLE_WITH_MLU
-    output->mutable_data(place, self.type());
+    output->mutable_data(place, self.dtype());
 #endif
   } else {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
     if (platform::is_cuda_pinned_place(place)) {
-      output->mutable_data(place, self.type());
+      output->mutable_data(place, self.dtype());
     } else if ((platform::is_gpu_place(place))) {
-      output->mutable_data(place, self.type());
+      output->mutable_data(place, self.dtype());
     }
 #endif
   }
@@ -677,7 +679,7 @@ inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self,
 
 inline framework::Tensor *_sliceTensor(const framework::Tensor &self,
                                        py::object obj, int dim) {
-  auto src_type = self.type();
+  auto src_type = framework::TransToProtoVarType(self.dtype());
   switch (src_type) {
     case framework::proto::VarType::FP16:
       return _sliceAndConcat<paddle::platform::float16>(self, obj, dim);
@@ -756,7 +758,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
   bool is_npu_tensor = platform::is_npu_place(tensor.place());
   bool is_mlu_tensor = platform::is_mlu_place(tensor.place());
   const auto &tensor_dims = tensor.dims();
-  auto tensor_dtype = tensor.type();
+  auto tensor_dtype = framework::TransToProtoVarType(tensor.dtype());
   size_t sizeof_dtype = framework::SizeOfType(tensor_dtype);
 
   std::vector<size_t> py_dims(tensor_dims.size());
@@ -771,7 +773,8 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor,
 
   const void *tensor_buf_ptr = tensor.data();
 
-  std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(tensor.type());
+  std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(
+      framework::TransToProtoVarType(tensor.dtype()));
 
   if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor && !is_mlu_tensor) {
     if (!need_deep_copy) {
diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc
index 230787c1b35cd..ea0e9d8cd3e0a 100644
--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
@@ -91,11 +91,11 @@ pten::ScalarArray MakePtenScalarArrayFromVarList(
   vector_data.reserve(variable_list.size());
 
   for (auto* var : variable_list) {
-    paddle::framework::proto::VarType::Type data_type;
+    paddle::experimental::DataType data_type;
     if (var->IsType<framework::LoDTensor>()) {
       const auto& tensor = var->Get<framework::LoDTensor>();
-      data_type = tensor.type();
-      if (data_type == paddle::framework::proto::VarType::INT64) {
+      data_type = tensor.dtype();
+      if (data_type == paddle::experimental::DataType::INT64) {
         const auto& tensor = var->Get<framework::LoDTensor>();
         if (tensor.IsInitialized() &&
             !platform::is_same_place(tensor.place(), expected_place)) {
@@ -105,7 +105,7 @@ pten::ScalarArray MakePtenScalarArrayFromVarList(
         } else {
           vector_data.push_back(*tensor.data<int64_t>());
         }
-      } else if (data_type == paddle::framework::proto::VarType::INT32) {
+      } else if (data_type == paddle::experimental::DataType::INT32) {
         const auto& tensor = var->Get<framework::LoDTensor>();
         if (tensor.IsInitialized() &&
             !platform::is_same_place(tensor.place(), expected_place)) {
diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt
index 46cd2a96414de..6beaf5797d692 100644
--- a/paddle/pten/core/CMakeLists.txt
+++ b/paddle/pten/core/CMakeLists.txt
@@ -8,7 +8,7 @@ if(WITH_GPU)
 endif()
 cc_library(pten_enforce INTERFACE SRCS enforce.cc DEPS ${pten_enforce_deps})
 
-cc_library(kernel_factory SRCS kernel_factory.cc DEPS pten_enforce convert_utils)
+cc_library(kernel_factory SRCS kernel_factory.cc DEPS pten_enforce fluid_convert_utils)
 cc_library(kernel_context SRCS kernel_context.cc DEPS pten_enforce pten_context)
 
 cc_library(ddim SRCS ddim.cc DEPS pten_enforce)
@@ -17,7 +17,7 @@ cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce mixed_vector)
 cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce mixed_vector)
 
 cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base)
-cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS convert_utils tensor_meta tensor_base)
+cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base)
 cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base)
 cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base)
 
diff --git a/paddle/pten/core/compat/convert_utils.cc b/paddle/pten/core/compat/convert_utils.cc
index 191f2c6c4b50a..2cd286677462d 100644
--- a/paddle/pten/core/compat/convert_utils.cc
+++ b/paddle/pten/core/compat/convert_utils.cc
@@ -21,77 +21,6 @@ limitations under the License. */
 
 namespace pten {
 
-paddle::experimental::DataType TransToPtenDataType(
-    const paddle::framework::proto::VarType::Type& dtype) {
-  // Set the order of case branches according to the frequency with
-  // the data type is used
-  switch (dtype) {
-    case paddle::framework::proto::VarType::FP32:
-      return DataType::FLOAT32;
-    case paddle::framework::proto::VarType::FP64:
-      return DataType::FLOAT64;
-    case paddle::framework::proto::VarType::INT64:
-      return DataType::INT64;
-    case paddle::framework::proto::VarType::INT32:
-      return DataType::INT32;
-    case paddle::framework::proto::VarType::INT8:
-      return DataType::INT8;
-    case paddle::framework::proto::VarType::UINT8:
-      return DataType::UINT8;
-    case paddle::framework::proto::VarType::INT16:
-      return DataType::INT16;
-    case paddle::framework::proto::VarType::COMPLEX64:
-      return DataType::COMPLEX64;
-    case paddle::framework::proto::VarType::COMPLEX128:
-      return DataType::COMPLEX128;
-    case paddle::framework::proto::VarType::FP16:
-      return DataType::FLOAT16;
-    case paddle::framework::proto::VarType::BF16:
-      return DataType::BFLOAT16;
-    case paddle::framework::proto::VarType::BOOL:
-      return DataType::BOOL;
-    default:
-      return DataType::UNDEFINED;
-  }
-}
-
-paddle::framework::proto::VarType::Type TransToProtoVarType(
-    const paddle::experimental::DataType& dtype) {
-  // Set the order of case branches according to the frequency with
-  // the data type is used
-  switch (dtype) {
-    case DataType::FLOAT32:
-      return paddle::framework::proto::VarType::FP32;
-    case DataType::FLOAT64:
-      return paddle::framework::proto::VarType::FP64;
-    case DataType::INT64:
-      return paddle::framework::proto::VarType::INT64;
-    case DataType::INT32:
-      return paddle::framework::proto::VarType::INT32;
-    case DataType::INT8:
-      return paddle::framework::proto::VarType::INT8;
-    case DataType::UINT8:
-      return paddle::framework::proto::VarType::UINT8;
-    case DataType::INT16:
-      return paddle::framework::proto::VarType::INT16;
-    case DataType::COMPLEX64:
-      return paddle::framework::proto::VarType::COMPLEX64;
-    case DataType::COMPLEX128:
-      return paddle::framework::proto::VarType::COMPLEX128;
-    case DataType::FLOAT16:
-      return paddle::framework::proto::VarType::FP16;
-    case DataType::BFLOAT16:
-      return paddle::framework::proto::VarType::BF16;
-    case DataType::BOOL:
-      return paddle::framework::proto::VarType::BOOL;
-    default:
-      PADDLE_THROW(pten::errors::Unimplemented(
-          "Unsupported data type `%s` when casting it into "
-          "paddle data type.",
-          dtype));
-  }
-}
-
 Backend TransToPtenBackend(const pten::Place& place) {
   if (place.GetType() == pten::AllocationType::CPU) {
     return Backend::CPU;
@@ -135,97 +64,6 @@ pten::Place TransToPtenPlace(const Backend& backend, bool set_device_id) {
   }
 }
 
-size_t DataTypeSize(DataType dtype) {
-  switch (dtype) {
-    case DataType::UNDEFINED:
-      return 0;
-    case DataType::BOOL:
-      return sizeof(bool);
-    case DataType::INT8:
-      return sizeof(int8_t);
-    case DataType::UINT8:
-      return sizeof(uint8_t);
-    case DataType::INT16:
-      return sizeof(int16_t);
-    case DataType::INT32:
-      return sizeof(int);
-    case DataType::INT64:
-      return sizeof(int64_t);
-    case DataType::FLOAT16:
-      return sizeof(pten::dtype::float16);
-    case DataType::FLOAT32:
-      return sizeof(float);
-    case DataType::FLOAT64:
-      return sizeof(double);
-    case DataType::COMPLEX64:
-      return sizeof(pten::dtype::complex<float>);
-    case DataType::COMPLEX128:
-      return sizeof(pten::dtype::complex<double>);
-    default:
-      return 0;
-  }
-}
-
-DataType String2DataType(const std::string& str) {
-  if (str == "bool") {
-    return DataType::BOOL;
-  } else if (str == "float16") {
-    return DataType::FLOAT16;
-  } else if (str == "float32") {
-    return DataType::FLOAT32;
-  } else if (str == "float64") {
-    return DataType::FLOAT64;
-  } else if (str == "int8") {
-    return DataType::INT8;
-  } else if (str == "int16") {
-    return DataType::INT16;
-  } else if (str == "int32") {
-    return DataType::INT32;
-  } else if (str == "int64") {
-    return DataType::INT64;
-  } else if (str == "uint8") {
-    return DataType::UINT8;
-  } else if (str == "complex64") {
-    return DataType::COMPLEX64;
-  } else if (str == "complex128") {
-    return DataType::COMPLEX128;
-  } else {
-    return DataType::UNDEFINED;
-  }
-}
-
-std::string DataType2String(DataType dtype) {
-  switch (dtype) {
-    case DataType::BOOL:
-      return "bool";
-    case DataType::INT8:
-      return "int8";
-    case DataType::UINT8:
-      return "uint8";
-    case DataType::INT16:
-      return "int16";
-    case DataType::INT32:
-      return "int32";
-    case DataType::INT64:
-      return "int64";
-    case DataType::FLOAT16:
-      return "float16";
-    case DataType::FLOAT32:
-      return "float32";
-    case DataType::FLOAT64:
-      return "float64";
-    case DataType::COMPLEX64:
-      return "complex64";
-    case DataType::COMPLEX128:
-      return "complex128";
-    default:
-      PADDLE_THROW(pten::errors::InvalidArgument(
-          "Unknow pten::DataType, the int value = %d.",
-          static_cast<int>(dtype)));
-      return "";
-  }
-}
-
 std::string TransToPtenKernelName(const std::string& fluid_op_name) {
   return OpUtilsMap::Instance().GetBaseKernelName(fluid_op_name);
 }
diff --git a/paddle/pten/core/compat/convert_utils.h b/paddle/pten/core/compat/convert_utils.h
index 3e6672e8c941c..0db71b577de51 100644
--- a/paddle/pten/core/compat/convert_utils.h
+++ b/paddle/pten/core/compat/convert_utils.h
@@ -27,20 +27,10 @@ limitations under the License. */
 
 namespace pten {
 
-// dtype convert function still relay on fluid proto
-DataType TransToPtenDataType(
-    const paddle::framework::proto::VarType::Type& dtype);
-paddle::framework::proto::VarType::Type TransToProtoVarType(
-    const DataType& dtype);
-
 std::string TransToPtenKernelName(const std::string& fluid_op_name);
 const std::string& TransToFluidOpName(const std::string& pten_kernel_name);
 
 Backend TransToPtenBackend(const pten::Place& place);
 pten::Place TransToPtenPlace(const Backend& backend, bool set_device_id = true);
 
-size_t DataTypeSize(DataType dtype);
-DataType String2DataType(const std::string& str);
-std::string DataType2String(DataType dtype);
-
 }  // namespace pten
diff --git a/paddle/pten/core/dense_tensor.inl b/paddle/pten/core/dense_tensor.inl
index 4df6b2556a8af..735783a844d2e 100644
--- a/paddle/pten/core/dense_tensor.inl
+++ b/paddle/pten/core/dense_tensor.inl
@@ -37,7 +37,7 @@ private:
 
 /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
 */
-explicit DenseTensor(paddle::framework::proto::VarType::Type dtype);
+explicit DenseTensor(paddle::experimental::DataType dtype);
 
 /// \brief Use existing storage space to create dense tensor. This interface
 /// can be used to deliberately create an uninitialized dense tensor.
@@ -63,23 +63,19 @@ T* mutable_data(const DDim& dims,
                 size_t requested_size = 0);
 
 void* mutable_data(const paddle::platform::Place& place,
-                    paddle::framework::proto::VarType::Type type,
+                    paddle::experimental::DataType type,
                     size_t requested_size = 0);
 
 void* mutable_data(const paddle::platform::Place& place,
                     size_t requested_size = 0);
 
 void* mutable_data(const paddle::platform::Place& place,
-                    paddle::framework::proto::VarType::Type type,
+                    paddle::experimental::DataType type,
                     const pten::Stream& stream);
 
 /* @jim19930609: Remove dependency on protobuf after Tensor Unification.
 */
-paddle::framework::proto::VarType::Type type() const;
-
-/* @jim19930609: Remove dependency on protobuf after Tensor Unification.
-*/
-paddle::framework::proto::VarType::Type saved_type() const;
+paddle::experimental::DataType type() const;
 
 // memory size returns the holding memory size in byte.
 size_t memory_size() const;
@@ -115,9 +111,9 @@ std::shared_ptr<pten::Allocation> MoveMemoryHolder() {
 void ResetHolder(const std::shared_ptr<pten::Allocation>& holder);
 
 void ResetHolderWithType(const std::shared_ptr<pten::Allocation>& holder,
-                        paddle::framework::proto::VarType::Type type);
+                        paddle::experimental::DataType type);
 
-void set_type(paddle::framework::proto::VarType::Type type);
+void set_type(paddle::experimental::DataType type);
 
 InplaceVersion& InplaceVersionCounter() {
   return *inplace_version_counter_;
diff --git a/paddle/pten/core/dense_tensor_impl.cc b/paddle/pten/core/dense_tensor_impl.cc
index 80c66c057158b..dfde62618d01c 100644
--- a/paddle/pten/core/dense_tensor_impl.cc
+++ b/paddle/pten/core/dense_tensor_impl.cc
@@ -30,8 +30,8 @@ DenseTensor::DenseTensor() {
   meta_.offset = 0;
 }
 
-DenseTensor::DenseTensor(paddle::framework::proto::VarType::Type dtype) {
-  meta_.dtype = TransToPtenDataType(dtype);
+DenseTensor::DenseTensor(paddle::experimental::DataType dtype) {
+  meta_.dtype = dtype;
   meta_.offset = 0;
 }
 
@@ -64,13 +64,7 @@ const paddle::platform::Place& DenseTensor::place() const {
   return holder_->place();
 }
 
-paddle::framework::proto::VarType::Type DenseTensor::type() const {
-  return TransToProtoVarType(meta_.dtype);
-}
-
-paddle::framework::proto::VarType::Type DenseTensor::saved_type() const {
-  return TransToProtoVarType(meta_.dtype);
-}
+paddle::experimental::DataType DenseTensor::type() const { return meta_.dtype; }
 
 void DenseTensor::set_layout(const paddle::framework::DataLayout layout) {
   meta_.layout = layout;
@@ -96,17 +90,17 @@ void DenseTensor::ResetHolder(const std::shared_ptr<pten::Allocation>& holder) {
 
 void DenseTensor::ResetHolderWithType(
     const std::shared_ptr<pten::Allocation>& holder,
-    paddle::framework::proto::VarType::Type type) {
+    paddle::experimental::DataType type) {
   set_type(type);
   ResetHolder(holder);
 }
 
-void DenseTensor::set_type(paddle::framework::proto::VarType::Type type) {
-  meta_.dtype = TransToPtenDataType(type);
+void DenseTensor::set_type(paddle::experimental::DataType type) {
+  meta_.dtype = type;
 }
 
 void* DenseTensor::mutable_data(const paddle::platform::Place& place,
-                                paddle::framework::proto::VarType::Type type,
+                                paddle::experimental::DataType type,
                                 size_t requested_size) {
   set_type(type);
   PADDLE_ENFORCE_GE(
@@ -139,7 +133,7 @@ void* DenseTensor::mutable_data(const paddle::platform::Place& place,
 }
 
 void* DenseTensor::mutable_data(const paddle::platform::Place& place,
-                                paddle::framework::proto::VarType::Type type,
+                                paddle::experimental::DataType type,
                                 const pten::Stream& stream) {
   set_type(type);
   PADDLE_ENFORCE_GE(
@@ -183,8 +177,10 @@ template <typename T>
 inline T* DenseTensor::mutable_data(const paddle::platform::Place& place,
                                     size_t requested_size) {
   static_assert(std::is_pod<T>::value, "T must be POD");
-  return reinterpret_cast<T*>(mutable_data(
-      place, paddle::framework::DataTypeTrait<T>::DataType(), requested_size));
+  return reinterpret_cast<T*>(
+      mutable_data(place,
+                   paddle::experimental::CppTypeToDataType<T>::Type(),
+                   requested_size));
 }
 
 void DenseTensor::ShareBufferWith(const DenseTensor& tensor) {
diff --git a/paddle/pten/kernels/cpu/copy_kernel.cc b/paddle/pten/kernels/cpu/copy_kernel.cc
index 6a5d33b91aa76..8df53c064a127 100644
--- a/paddle/pten/kernels/cpu/copy_kernel.cc
+++ b/paddle/pten/kernels/cpu/copy_kernel.cc
@@ -47,8 +47,7 @@ void Copy(const Context& dev_ctx,
   VLOG(4) << "src:" << src_ptr << ", dst:" << dst_ptr;
   CHECK(dst->layout() == src.layout());
 
-  auto size = src.numel() *
-              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
+  auto size = src.numel() * paddle::experimental::SizeOf(src.dtype());
 
   if (paddle::platform::is_cpu_place(src_place)) {
     paddle::memory::Copy(src_place, dst_ptr, src_place, src_ptr, size);
diff --git a/paddle/pten/kernels/funcs/math_function.cc b/paddle/pten/kernels/funcs/math_function.cc
index 550ec23c18f3a..780068e0381aa 100644
--- a/paddle/pten/kernels/funcs/math_function.cc
+++ b/paddle/pten/kernels/funcs/math_function.cc
@@ -220,8 +220,9 @@ void set_constant_with_place<paddle::platform::CPUPlace>(
     const paddle::platform::DeviceContext& context,
     paddle::framework::Tensor* tensor,
     float value) {
-  paddle::framework::VisitDataType(tensor->type(),
-                                   TensorSetConstantCPU(tensor, value));
+  paddle::framework::VisitDataType(
+      paddle::framework::TransToProtoVarType(tensor->type()),
+      TensorSetConstantCPU(tensor, value));
 }
 
 template <>
@@ -238,8 +239,9 @@ void set_constant_with_place<paddle::platform::CUDAPinnedPlace>(
     const paddle::platform::DeviceContext& context,
     paddle::framework::Tensor* tensor,
     float value) {
-  paddle::framework::VisitDataType(tensor->type(),
-                                   TensorSetConstantCPU(tensor, value));
+  paddle::framework::VisitDataType(
+      paddle::framework::TransToProtoVarType(tensor->type()),
+      TensorSetConstantCPU(tensor, value));
 }
 
 struct TensorSetConstantWithPlace : public boost::static_visitor<void> {
diff --git a/paddle/pten/kernels/funcs/math_function.cu b/paddle/pten/kernels/funcs/math_function.cu
index 76bc5f806d3e8..f7cee12b2dfd4 100644
--- a/paddle/pten/kernels/funcs/math_function.cu
+++ b/paddle/pten/kernels/funcs/math_function.cu
@@ -227,7 +227,8 @@ void set_constant_with_place<paddle::platform::CUDAPlace>(
     paddle::framework::Tensor* tensor,
     float value) {
   paddle::framework::VisitDataType(
-      tensor->type(), TensorSetConstantGPU(context, tensor, value));
+      paddle::framework::TransToProtoVarType(tensor->type()),
+      TensorSetConstantGPU(context, tensor, value));
 }
 
 template <typename T>
diff --git a/paddle/pten/kernels/funcs/math_function.h b/paddle/pten/kernels/funcs/math_function.h
index 8208c0afb0675..73b9dd00bc640 100644
--- a/paddle/pten/kernels/funcs/math_function.h
+++ b/paddle/pten/kernels/funcs/math_function.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <memory>
 #include <vector>
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/tensor.h"
diff --git a/paddle/pten/kernels/funcs/math_function_impl.h b/paddle/pten/kernels/funcs/math_function_impl.h
index 286f694ce51a9..19f3082c05cc2 100644
--- a/paddle/pten/kernels/funcs/math_function_impl.h
+++ b/paddle/pten/kernels/funcs/math_function_impl.h
@@ -31,7 +31,7 @@ void SetConstant<DeviceContext, T>::operator()(
   if (paddle::platform::is_xpu_place(context.GetPlace())) {
     xpu_place = true;
     paddle::framework::VisitDataType(
-        tensor->type(),
+        paddle::framework::TransToProtoVarType(tensor->type()),
         TensorSetConstantXPU<T>(tensor, num, context.GetPlace()));
   }
 #endif
diff --git a/paddle/pten/kernels/gpu/copy_kernel.cu b/paddle/pten/kernels/gpu/copy_kernel.cu
index d48a9fb1d774f..5d6229f09f015 100644
--- a/paddle/pten/kernels/gpu/copy_kernel.cu
+++ b/paddle/pten/kernels/gpu/copy_kernel.cu
@@ -54,8 +54,7 @@ void Copy(const Context& dev_ctx,
 
   CHECK(dst->layout() == src.layout());
 
-  auto size = src.numel() *
-              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
+  auto size = src.numel() * paddle::experimental::SizeOf(src.dtype());
 
   if (paddle::platform::is_cuda_pinned_place(src_place) &&  // NOLINT
       paddle::platform::is_cuda_pinned_place(dst_place)) {
diff --git a/paddle/pten/kernels/xpu/copy_kernel.cc b/paddle/pten/kernels/xpu/copy_kernel.cc
index f27705ca112b3..fa0331f24ddb7 100644
--- a/paddle/pten/kernels/xpu/copy_kernel.cc
+++ b/paddle/pten/kernels/xpu/copy_kernel.cc
@@ -45,8 +45,7 @@ void Copy(const Context& dev_ctx,
           << dst_place;
   dst->ResizeAndAllocate(src.dims());
   CHECK(dst->layout() == src.layout());
-  auto size = src.numel() *
-              paddle::framework::SizeOfType(TransToProtoVarType(src.dtype()));
+  auto size = src.numel() * paddle::experimental::SizeOf(src.dtype());
 
   if (paddle::platform::is_xpu_place(src_place) &&  // NOLINT
       paddle::platform::is_cpu_place(dst_place)) {
diff --git a/paddle/pten/tests/core/test_convert_utils.cc b/paddle/pten/tests/core/test_convert_utils.cc
index cc7ac6e7e59ca..977e49aafb9bd 100644
--- a/paddle/pten/tests/core/test_convert_utils.cc
+++ b/paddle/pten/tests/core/test_convert_utils.cc
@@ -13,59 +13,66 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "gtest/gtest.h"
-#include "paddle/pten/core/compat/convert_utils.h"
+#include "paddle/fluid/framework/convert_utils.h"
 
 namespace pten {
 namespace tests {
 
 TEST(ConvertUtils, DataType) {
   // enum -> proto
-  CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT64) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT64) ==
         paddle::framework::proto::VarType::FP64);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT32) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT32) ==
         paddle::framework::proto::VarType::FP32);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::UINT8) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::UINT8) ==
         paddle::framework::proto::VarType::UINT8);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::INT8) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT8) ==
         paddle::framework::proto::VarType::INT8);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::INT32) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT32) ==
         paddle::framework::proto::VarType::INT32);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::INT64) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT64) ==
         paddle::framework::proto::VarType::INT64);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::INT16) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::INT16) ==
         paddle::framework::proto::VarType::INT16);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::BOOL) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::BOOL) ==
         paddle::framework::proto::VarType::BOOL);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX64) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::COMPLEX64) ==
         paddle::framework::proto::VarType::COMPLEX64);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::COMPLEX128) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::COMPLEX128) ==
         paddle::framework::proto::VarType::COMPLEX128);
-  CHECK(pten::TransToProtoVarType(paddle::DataType::FLOAT16) ==
+  CHECK(paddle::framework::TransToProtoVarType(paddle::DataType::FLOAT16) ==
         paddle::framework::proto::VarType::FP16);
   // proto -> enum
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP64) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::FP64) ==
         paddle::DataType::FLOAT64);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP32) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::FP32) ==
         paddle::DataType::FLOAT32);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT64) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::INT64) ==
         paddle::DataType::INT64);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT32) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::INT32) ==
         paddle::DataType::INT32);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT8) ==
-        paddle::DataType::INT8);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::UINT8) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::INT8) == paddle::DataType::INT8);
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::UINT8) ==
         paddle::DataType::UINT8);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::INT16) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::INT16) ==
         paddle::DataType::INT16);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::BOOL) ==
-        paddle::DataType::BOOL);
-  CHECK(
-      pten::TransToPtenDataType(paddle::framework::proto::VarType::COMPLEX64) ==
-      paddle::DataType::COMPLEX64);
-  CHECK(pten::TransToPtenDataType(
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::BOOL) == paddle::DataType::BOOL);
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::COMPLEX64) ==
+        paddle::DataType::COMPLEX64);
+  CHECK(paddle::framework::TransToPtenDataType(
             paddle::framework::proto::VarType::COMPLEX128) ==
         paddle::DataType::COMPLEX128);
-  CHECK(pten::TransToPtenDataType(paddle::framework::proto::VarType::FP16) ==
+  CHECK(paddle::framework::TransToPtenDataType(
+            paddle::framework::proto::VarType::FP16) ==
         paddle::DataType::FLOAT16);
 }
 
diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h
index f919340303dde..9cec48f9c99b5 100644
--- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h
+++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/platform/device_context.h"
@@ -78,7 +79,8 @@ struct ReluFunctor {
 inline void ReluForward(const paddle::framework::Tensor &x,
                         paddle::framework::Tensor *y) {
   custom_raw_op::ReluFunctor functor(x, y);
-  paddle::framework::VisitDataType(x.type(), functor);
+  paddle::framework::VisitDataType(
+      paddle::framework::TransToProtoVarType(x.dtype()), functor);
 }
 
 }  // namespace custom_raw_op