From 65420271609b8cce860ec8034569292db7d13d71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Wed, 7 Dec 2022 14:36:34 +0800 Subject: [PATCH] [phi::DenseTensor] Replace Tensor with phi::DenseTensor (#48682) --- .../fluid/imperative/gradient_accumulator.cc | 10 +- paddle/fluid/operators/abs_op_mlu.cc | 4 +- paddle/fluid/operators/abs_op_npu.cc | 2 - paddle/fluid/operators/activation_op_mlu.cc | 4 +- paddle/fluid/operators/activation_op_npu.cc | 218 +++++++++--------- paddle/fluid/operators/affine_grid_op.cc | 2 - .../amp/alloc_float_status_op_npu.cc | 2 - .../amp/check_finite_and_unscale_op_mlu.cc | 8 +- .../amp/check_finite_and_unscale_op_npu.cc | 10 +- .../check_finite_and_unscale_op_npu_test.cc | 4 +- .../amp/clear_float_status_op_npu.cc | 4 +- .../operators/amp/get_float_status_op_npu.cc | 4 +- .../amp/update_loss_scaling_op_npu.cc | 6 +- paddle/fluid/operators/arg_max_op_npu.cc | 3 +- paddle/fluid/operators/arg_min_op_npu.cc | 1 - paddle/fluid/operators/argsort_op_npu.cc | 35 ++- paddle/fluid/operators/attention_lstm_op.cc | 33 +-- paddle/fluid/operators/attention_lstm_op.h | 2 - paddle/fluid/operators/batch_norm_op.cc | 18 +- paddle/fluid/operators/batch_norm_op.cu | 1 - paddle/fluid/operators/batch_norm_op.h | 1 - paddle/fluid/operators/batch_norm_op_mlu.cc | 12 +- paddle/fluid/operators/batch_norm_op_npu.cc | 2 +- paddle/fluid/operators/bce_loss_op_mlu.cc | 2 - paddle/fluid/operators/bce_loss_op_npu.cc | 2 - paddle/fluid/operators/cast_op.cc | 2 +- paddle/fluid/operators/cast_op_mlu.cc | 2 - paddle/fluid/operators/cast_op_npu.cc | 2 - paddle/fluid/operators/center_loss_op.h | 3 +- paddle/fluid/operators/clip_by_norm_op.h | 1 - paddle/fluid/operators/clip_by_norm_op_npu.cc | 8 +- paddle/fluid/operators/clip_op_mlu.cc | 8 +- paddle/fluid/operators/clip_op_npu.cc | 10 +- paddle/fluid/operators/coalesce_tensor_op.cc | 2 +- .../operators/collective/c_allreduce_op.h | 5 +- .../c_softmax_with_cross_entropy_op.cu | 20 +- paddle/fluid/operators/concat_op.cc | 1 - paddle/fluid/operators/concat_op_mlu.cc | 4 +- .../operators/controlflow/logical_op_mlu.cc | 2 - .../operators/controlflow/logical_op_npu.cc | 2 - paddle/fluid/operators/conv_op.h | 2 - paddle/fluid/operators/conv_op_mlu.cc | 33 ++- paddle/fluid/operators/conv_op_npu.cc | 29 +-- .../fluid/operators/conv_transpose_op_mlu.cc | 17 +- .../fluid/operators/conv_transpose_op_npu.cc | 9 +- paddle/fluid/operators/copy_cross_scope_op.cc | 2 - paddle/fluid/operators/correlation_op.cc | 2 - paddle/fluid/operators/cos_sim_op.h | 6 +- paddle/fluid/operators/crop_op_npu.cc | 6 +- paddle/fluid/operators/cross_entropy_op.h | 6 +- paddle/fluid/operators/ctc_align_op.h | 2 - paddle/fluid/operators/cudnn_lstm_op.cu.cc | 49 ++-- paddle/fluid/operators/cumsum_op_mlu.cc | 4 +- paddle/fluid/operators/cumsum_op_npu.cc | 8 +- paddle/fluid/operators/cvm_op.cc | 2 - paddle/fluid/operators/cvm_op.cu | 1 - paddle/fluid/operators/cvm_op.h | 2 - paddle/fluid/operators/data_norm_op.cc | 13 +- paddle/fluid/operators/data_norm_op.cu | 3 +- .../fluid/operators/deformable_conv_op_mlu.cc | 30 ++- .../operators/deformable_psroi_pooling_op.cu | 1 - .../operators/deformable_psroi_pooling_op.h | 4 +- .../fluid/operators/detection/bbox_util.cu.h | 16 +- .../operators/detection/bipartite_match_op.cc | 4 +- .../fluid/operators/detection/box_clip_op.cu | 1 - .../fluid/operators/detection/box_clip_op.h | 9 +- .../operators/detection/box_coder_op_npu.cc | 135 +++++------ .../detection/collect_fpn_proposals_op.cc | 1 - .../detection/collect_fpn_proposals_op.cu | 26 +-- .../detection/density_prior_box_op_npu.cc | 71 +++--- .../detection/generate_mask_labels_op.cc | 58 ++--- .../detection/generate_proposal_labels_op.cc | 62 ++--- .../detection/generate_proposals_op.cc | 42 ++-- .../detection/generate_proposals_op.cu | 40 ++-- .../detection/generate_proposals_v2_op.cc | 2 - .../detection/iou_similarity_op_mlu.cc | 50 ++-- .../detection/iou_similarity_op_npu.cc | 50 ++-- .../detection/locality_aware_nms_op.cc | 10 +- .../operators/detection/matrix_nms_op.cc | 2 - .../operators/detection/multiclass_nms_op.cc | 10 +- .../detection/polygon_box_transform_op.cc | 2 - .../detection/polygon_box_transform_op.cu | 1 - .../operators/detection/prior_box_op_npu.cc | 8 +- .../retinanet_detection_output_op.cc | 47 ++-- .../detection/roi_perspective_transform_op.cc | 14 +- .../detection/rpn_target_assign_op.cc | 138 +++++------ .../detection/sigmoid_focal_loss_op.cu | 21 +- .../detection/sigmoid_focal_loss_op.h | 21 +- .../operators/detection/yolo_box_op_mlu.cc | 2 +- paddle/fluid/operators/detection_map_op.cc | 2 - paddle/fluid/operators/dgc_clip_by_norm_op.h | 2 - paddle/fluid/operators/dropout_op_mlu.cc | 8 +- paddle/fluid/operators/dropout_op_npu.cc | 16 +- .../elementwise/elementwise_add_op_mlu.cc | 1 - .../elementwise/elementwise_add_op_npu.cc | 7 +- .../elementwise/elementwise_div_op.h | 1 - .../elementwise/elementwise_div_op_mlu.cc | 8 +- .../elementwise/elementwise_div_op_npu.cc | 22 +- .../elementwise_floordiv_op_npu.cc | 2 - .../elementwise/elementwise_max_op_npu.cc | 18 +- .../elementwise/elementwise_min_op_mlu.cc | 2 - .../elementwise/elementwise_min_op_npu.cc | 16 +- .../operators/elementwise/elementwise_mlu.h | 6 +- .../elementwise/elementwise_mod_op_npu.cc | 4 +- .../elementwise/elementwise_mul_op.h | 1 - .../elementwise/elementwise_mul_op_mlu.cc | 5 +- .../elementwise/elementwise_mul_op_npu.cc | 9 +- .../operators/elementwise/elementwise_npu.h | 9 +- .../operators/elementwise/elementwise_op.h | 6 - .../elementwise/elementwise_pow_op_mlu.cc | 12 +- .../elementwise/elementwise_pow_op_npu.cc | 32 ++- .../elementwise/elementwise_sub_op_mlu.cc | 2 - .../elementwise/elementwise_sub_op_npu.cc | 8 +- paddle/fluid/operators/expand_as_op.h | 1 - paddle/fluid/operators/expand_as_v2_op.h | 1 - paddle/fluid/operators/expand_as_v2_op_mlu.cc | 2 - paddle/fluid/operators/expand_op.h | 1 - paddle/fluid/operators/expand_v2_op_npu.cc | 9 +- paddle/fluid/operators/eye_op_npu.cc | 2 - paddle/fluid/operators/fc_op.h | 1 - .../fill_constant_batch_size_like_op_npu.cc | 4 +- .../fluid/operators/fill_constant_op_mlu.cc | 3 +- paddle/fluid/operators/filter_by_instag_op.cu | 1 - paddle/fluid/operators/filter_by_instag_op.h | 1 - paddle/fluid/operators/flatten_op.cc | 2 - paddle/fluid/operators/flatten_op_npu.cc | 2 - paddle/fluid/operators/fsp_op.h | 2 - paddle/fluid/operators/fused/attn_gemm.h | 1 - paddle/fluid/operators/fused/attn_gemm_int8.h | 1 - .../fluid/operators/fused/conv_fusion_op.cu | 10 +- .../operators/fused/cudnn_bn_add_relu_test.cc | 171 +++++++------- .../fused/cudnn_bn_stats_finalize.cu.h | 21 +- .../operators/fused/cudnn_norm_conv.cu.h | 21 +- .../operators/fused/cudnn_norm_conv_test.cc | 13 +- .../fused/cudnn_scale_bias_add_relu.cu.h | 39 ++-- paddle/fluid/operators/fused/fmha_ref.h | 2 - .../operators/fused/fused_attention_op.cc | 2 - .../operators/fused/fused_attention_op.cu | 8 +- .../operators/fused/fused_attention_op_xpu.cc | 157 +++++++------ ...sed_bias_dropout_residual_layer_norm_op.cc | 2 - ...sed_bias_dropout_residual_layer_norm_op.cu | 2 - .../operators/fused/fused_bn_activation_op.cc | 6 +- .../operators/fused/fused_bn_activation_op.cu | 5 +- .../operators/fused/fused_bn_activation_op.h | 1 - .../fused/fused_bn_add_activation_op.cc | 6 +- .../fused/fused_bn_add_activation_op.cu | 5 +- .../fused/fused_bn_add_activation_op.h | 1 - .../fused_embedding_eltwise_layernorm_op.cu | 1 - .../fused/fused_embedding_fc_lstm_op.cc | 21 +- .../fused/fused_embedding_fc_lstm_op.h | 2 - .../fused/fused_embedding_seq_pool_op.h | 5 +- .../operators/fused/fused_feedforward_op.cc | 1 - .../operators/fused/fused_feedforward_op.cu | 2 - .../fused/fused_feedforward_op_xpu.cc | 194 ++++++++-------- .../operators/fused/fused_gate_attention.h | 36 ++- .../fused/fused_gate_attention_op.cc | 1 - .../fused/fused_gate_attention_op.cu | 93 ++++---- .../operators/fused/fused_gemm_epilogue_op.cc | 1 - .../operators/fused/fused_gemm_epilogue_op.cu | 2 - .../fused/fused_gemm_epilogue_op_xpu.cc | 2 - .../fused/fused_multi_transformer_int8_op.cc | 4 +- .../fused/fused_multi_transformer_int8_op.cu | 38 +-- .../fused/fused_multi_transformer_op.cc | 4 +- .../fused/fused_multi_transformer_op.cu | 92 ++++---- .../fused/fused_multi_transformer_op.cu.h | 12 +- .../fused/fusion_conv_inception_op.cu | 1 - paddle/fluid/operators/fused/fusion_gru_op.cc | 29 +-- paddle/fluid/operators/fused/fusion_gru_op.h | 2 - .../fluid/operators/fused/fusion_lstm_op.cc | 21 +- paddle/fluid/operators/fused/fusion_lstm_op.h | 2 - .../fused/fusion_repeated_fc_relu_op.cc | 8 +- .../fused/fusion_repeated_fc_relu_op.h | 2 - .../fused/fusion_seqconv_eltadd_relu_op.cc | 20 +- .../fused/fusion_seqconv_eltadd_relu_op.h | 2 - .../fused/fusion_seqexpand_concat_fc_op.cc | 7 +- .../fused/fusion_seqexpand_concat_fc_op.h | 2 - .../fused/fusion_seqpool_concat_op.h | 2 - .../fused/fusion_seqpool_cvm_concat_op.cc | 3 +- .../fused/fusion_seqpool_cvm_concat_op.h | 2 - .../fused/fusion_squared_mat_sub_op.cc | 12 +- .../fused/fusion_squared_mat_sub_op.h | 2 - .../operators/fused/multihead_matmul_op.cu | 11 +- .../operators/fused/resnet_basic_block_op.cc | 1 - .../fused/resnet_basic_block_op_xpu.cc | 2 - .../fluid/operators/fused/resnet_unit_op.cc | 2 - .../fluid/operators/fused/resnet_unit_op.cu | 133 ++++++----- .../operators/fused/resnet_unit_op_xpu.cc | 101 ++++---- .../operators/fused/skip_layernorm_op.cu | 1 - .../fused/xpu_fused_common_function.h | 15 +- .../fluid/operators/fused/yolo_box_head_op.cu | 1 - .../fluid/operators/fused/yolo_box_post_op.cu | 1 - paddle/fluid/operators/gather_nd_op_mlu.cc | 2 - paddle/fluid/operators/gather_nd_op_npu.cc | 1 - .../fluid/operators/gather_scatter_kernel.cc | 24 +- .../fluid/operators/gather_scatter_kernel.cu | 26 +-- .../fluid/operators/gather_scatter_kernel.h | 50 ++-- paddle/fluid/operators/gaussian_random_op.cc | 2 - .../fluid/operators/gaussian_random_op_mlu.cc | 3 +- .../fluid/operators/gaussian_random_op_npu.cc | 3 +- paddle/fluid/operators/gelu_op_npu.cc | 2 - .../fluid/operators/graph_khop_sampler_op.cu | 2 - .../fluid/operators/graph_khop_sampler_op.h | 2 - paddle/fluid/operators/grid_sampler_op_mlu.cc | 6 +- paddle/fluid/operators/group_norm_op.cc | 15 +- paddle/fluid/operators/group_norm_op.cu | 10 +- paddle/fluid/operators/group_norm_op.h | 1 - paddle/fluid/operators/group_norm_op_npu.cc | 47 ++-- paddle/fluid/operators/gru_op.cc | 14 +- paddle/fluid/operators/gru_op.cu.cc | 9 +- paddle/fluid/operators/gru_op.h | 21 +- paddle/fluid/operators/gru_unit_op.h | 6 +- paddle/fluid/operators/huber_loss_op_mlu.cc | 26 +-- paddle/fluid/operators/huber_loss_op_npu.cc | 6 +- paddle/fluid/operators/im2sequence_op.h | 37 ++- paddle/fluid/operators/index_sample_op_npu.cc | 5 +- paddle/fluid/operators/index_select_op.h | 1 - paddle/fluid/operators/index_select_op_npu.cc | 8 +- paddle/fluid/operators/inplace_abn_op.cc | 10 +- paddle/fluid/operators/inplace_abn_op.cu | 6 +- paddle/fluid/operators/inplace_abn_op.h | 1 - paddle/fluid/operators/instance_norm_op.cc | 12 +- paddle/fluid/operators/instance_norm_op.h | 1 - .../fluid/operators/instance_norm_op_npu.cc | 3 +- paddle/fluid/operators/interpolate_op.cu | 16 +- paddle/fluid/operators/interpolate_op.h | 3 +- paddle/fluid/operators/interpolate_op_npu.cc | 5 +- .../fluid/operators/interpolate_v2_op_mlu.cc | 4 +- .../fluid/operators/interpolate_v2_op_npu.cc | 72 +++--- paddle/fluid/operators/jit/benchmark.cc | 35 ++- paddle/fluid/operators/kldiv_loss_op_npu.cc | 4 +- paddle/fluid/operators/label_smooth_op_mlu.cc | 2 - paddle/fluid/operators/label_smooth_op_npu.cc | 10 +- paddle/fluid/operators/layer_norm_kernel.cu.h | 1 - paddle/fluid/operators/layer_norm_op.cc | 7 +- paddle/fluid/operators/layer_norm_op_mlu.cc | 17 +- paddle/fluid/operators/layer_norm_op_npu.cc | 33 ++- paddle/fluid/operators/layout_utils.h | 2 - .../fluid/operators/limit_by_capacity_op.cu | 2 - paddle/fluid/operators/log_loss_op_npu.cc | 2 - paddle/fluid/operators/log_loss_op_xpu.cc | 2 - .../fluid/operators/lookup_table_dequant_op.h | 1 - paddle/fluid/operators/lookup_table_op.h | 1 - paddle/fluid/operators/lookup_table_v2_op.h | 11 +- .../fluid/operators/lookup_table_v2_op_mlu.cc | 4 +- .../fluid/operators/lookup_table_v2_op_npu.cc | 11 +- paddle/fluid/operators/lrn_op.h | 3 - paddle/fluid/operators/lstm_op.h | 40 ++-- paddle/fluid/operators/lstmp_op.h | 47 ++-- .../fluid/operators/masked_select_op_mlu.cc | 14 +- .../fluid/operators/match_matrix_tensor_op.cc | 3 +- .../fluid/operators/match_matrix_tensor_op.h | 1 - paddle/fluid/operators/math/context_project.h | 45 ++-- .../operators/math/eigen_values_vectors.h | 26 +-- paddle/fluid/operators/math/sample_prob.cu | 4 +- paddle/fluid/operators/math/sample_prob.h | 2 - .../fluid/operators/math/sequence_pooling.cc | 5 +- paddle/fluid/operators/math/softmax.cu | 5 +- paddle/fluid/operators/math/tree2col.cu | 9 +- paddle/fluid/operators/matmul_op_mlu.cc | 10 +- paddle/fluid/operators/matmul_op_npu.cc | 27 ++- paddle/fluid/operators/matmul_v2_op_mlu.cc | 10 +- paddle/fluid/operators/matmul_v2_op_npu.cc | 21 +- paddle/fluid/operators/mean_iou_op.h | 7 +- paddle/fluid/operators/mean_op_mlu.cc | 17 +- paddle/fluid/operators/mean_op_npu.cc | 21 +- paddle/fluid/operators/meshgrid_op_mlu.cc | 12 +- .../operators/metrics/accuracy_op_mlu.cc | 14 +- .../operators/metrics/accuracy_op_xpu.cc | 1 - .../operators/metrics/precision_recall_op.h | 1 - .../operators/mkldnn/dequantize_mkldnn_op.cc | 1 - .../operators/mkldnn/matmul_v2_mkldnn_op.cc | 49 ++-- .../operators/mkldnn/quantize_mkldnn_op.cc | 1 - .../operators/mkldnn/requantize_mkldnn_op.cc | 1 - .../operators/mkldnn/reshape_mkldnn_op.cc | 2 +- .../operators/mkldnn/transpose_mkldnn_op.cc | 10 +- paddle/fluid/operators/mlu/mlu_baseop.cc | 128 +++++----- paddle/fluid/operators/mlu/mlu_baseop.h | 13 +- .../fluid/operators/modified_huber_loss_op.cu | 2 - .../fluid/operators/modified_huber_loss_op.h | 1 - paddle/fluid/operators/multi_dot_op.cc | 1 - paddle/fluid/operators/multinomial_op_npu.cc | 2 - paddle/fluid/operators/multiplex_op.cc | 2 - paddle/fluid/operators/nce_op.h | 11 +- paddle/fluid/operators/norm_op_npu.cc | 1 - paddle/fluid/operators/norm_utils.cu.h | 29 ++- paddle/fluid/operators/number_count_op.cu | 2 - paddle/fluid/operators/one_hot_op.h | 1 - paddle/fluid/operators/one_hot_op_npu.cc | 3 +- paddle/fluid/operators/one_hot_op_xpu.cc | 2 - paddle/fluid/operators/one_hot_v2_op_mlu.cc | 13 +- paddle/fluid/operators/one_hot_v2_op_npu.cc | 3 +- .../fluid/operators/optimizers/adadelta_op.cc | 2 - .../fluid/operators/optimizers/adagrad_op.cc | 1 - paddle/fluid/operators/optimizers/adam_op.h | 2 - .../fluid/operators/optimizers/adam_op_mlu.cc | 14 +- .../fluid/operators/optimizers/adam_op_npu.cc | 14 +- .../fluid/operators/optimizers/adamax_op.cc | 1 - .../optimizers/decayed_adagrad_op.cc | 1 - paddle/fluid/operators/optimizers/dpsgd_op.cc | 1 - paddle/fluid/operators/optimizers/ftrl_op.cc | 1 - paddle/fluid/operators/optimizers/ftrl_op.h | 1 - .../operators/optimizers/merged_adam_op.cc | 2 - .../optimizers/merged_momentum_op_mlu.cc | 5 +- .../fluid/operators/optimizers/momentum_op.cc | 14 +- .../operators/optimizers/momentum_op_mlu.cc | 4 +- .../optimizers/proximal_adagrad_op.cc | 1 - .../optimizers/proximal_adagrad_op.h | 2 - .../operators/optimizers/proximal_gd_op.cc | 1 - .../operators/optimizers/proximal_gd_op.h | 2 - .../operators/optimizers/rmsprop_op_npu.cc | 14 +- .../optimizers/sparse_momentum_op.cc | 19 +- paddle/fluid/operators/p_norm_op_npu.cc | 21 +- paddle/fluid/operators/pad3d_op_npu.cc | 2 - paddle/fluid/operators/pad_op_npu.cc | 2 - paddle/fluid/operators/partial_concat_op.cc | 1 - paddle/fluid/operators/partial_concat_op.cu | 4 +- paddle/fluid/operators/partial_concat_op.h | 1 - paddle/fluid/operators/partial_sum_op.cc | 1 - paddle/fluid/operators/partial_sum_op.cu | 6 +- paddle/fluid/operators/partial_sum_op.h | 2 - paddle/fluid/operators/pool_op.cc | 8 +- paddle/fluid/operators/pool_op.h | 2 - paddle/fluid/operators/pool_op_mlu.cc | 12 +- .../operators/positive_negative_pair_op.h | 2 - paddle/fluid/operators/prelu_op.cc | 2 - paddle/fluid/operators/prroi_pool_op.cc | 2 - paddle/fluid/operators/prroi_pool_op.cu | 2 - paddle/fluid/operators/pyramid_hash_op.cc | 1 - paddle/fluid/operators/random_routing_op.cu | 2 - paddle/fluid/operators/rank_attention_op.cc | 1 - .../operators/reduce_ops/reduce_any_op_npu.cc | 1 - .../reduce_ops/reduce_any_op_npu_test.cc | 2 - .../operators/reduce_ops/reduce_max_op_mlu.cc | 20 +- .../operators/reduce_ops/reduce_max_op_npu.cc | 15 +- .../reduce_ops/reduce_mean_op_mlu.cc | 2 +- .../reduce_ops/reduce_mean_op_npu.cc | 6 +- .../operators/reduce_ops/reduce_min_op_npu.cc | 5 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 9 +- .../operators/reduce_ops/reduce_op_function.h | 1 - .../reduce_ops/reduce_prod_op_npu.cc | 1 - .../operators/reduce_ops/reduce_sum_op.h | 2 +- .../operators/reduce_ops/reduce_sum_op_mlu.cc | 2 +- .../operators/reduce_ops/reduce_sum_op_npu.cc | 2 +- paddle/fluid/operators/reshape_op.cc | 8 +- paddle/fluid/operators/rnn_op_mlu.cc | 3 +- paddle/fluid/operators/roi_align_op.cc | 2 - paddle/fluid/operators/roi_align_op_mlu.cc | 24 +- paddle/fluid/operators/roi_align_op_npu.cc | 11 +- paddle/fluid/operators/roi_pool_op.cc | 2 - paddle/fluid/operators/sample_logits_op.cu | 15 +- paddle/fluid/operators/sample_logits_op.h | 16 +- paddle/fluid/operators/sampling_id_op.cc | 2 - paddle/fluid/operators/sampling_id_op.h | 2 - paddle/fluid/operators/save_combine_op.cc | 2 - paddle/fluid/operators/scatter_op_mlu.cc | 2 +- paddle/fluid/operators/scatter_op_npu.cc | 10 +- paddle/fluid/operators/search_compute.h | 1 - paddle/fluid/operators/seed_op.cc | 1 - paddle/fluid/operators/seed_op.h | 1 - paddle/fluid/operators/set_value_op.cc | 27 ++- paddle/fluid/operators/set_value_op.h | 1 - paddle/fluid/operators/set_value_op_mlu.cc | 8 +- paddle/fluid/operators/set_value_op_npu.cc | 6 +- paddle/fluid/operators/shape_op_mlu.cc | 3 +- paddle/fluid/operators/shape_op_npu.cc | 2 - paddle/fluid/operators/shard_index_op_npu.cc | 11 +- paddle/fluid/operators/shuffle_batch_op.h | 1 - paddle/fluid/operators/shuffle_channel_op.cu | 1 - ...igmoid_cross_entropy_with_logits_op_mlu.cc | 1 - ...igmoid_cross_entropy_with_logits_op_npu.cc | 1 - paddle/fluid/operators/similarity_focus_op.h | 1 - paddle/fluid/operators/slice_op.cc | 6 +- paddle/fluid/operators/slice_op_mlu.cc | 2 - paddle/fluid/operators/slice_op_npu.cc | 3 +- paddle/fluid/operators/smooth_l1_loss_op.h | 7 +- .../fluid/operators/smooth_l1_loss_op_npu.cc | 22 +- .../softmax_with_cross_entropy_op_mlu.cc | 2 - .../softmax_with_cross_entropy_op_npu.cc | 6 +- paddle/fluid/operators/space_to_depth_op.cc | 2 - paddle/fluid/operators/sparse_attention_op.cu | 48 ++-- paddle/fluid/operators/split_op_mlu.cc | 2 - paddle/fluid/operators/split_op_npu.cc | 4 +- .../fluid/operators/squared_l2_distance_op.h | 2 - .../fluid/operators/squared_l2_norm_op_mlu.cc | 6 +- .../fluid/operators/squared_l2_norm_op_npu.cc | 8 +- paddle/fluid/operators/stack_op_mlu.cc | 10 +- paddle/fluid/operators/stack_op_npu.cc | 18 +- paddle/fluid/operators/stft_op.h | 16 +- paddle/fluid/operators/strided_slice_op.cc | 6 +- .../fluid/operators/strided_slice_op_mlu.cc | 9 +- .../fluid/operators/strided_slice_op_npu.cc | 31 ++- paddle/fluid/operators/sum_op_mlu.cc | 3 +- paddle/fluid/operators/sum_op_npu.cc | 3 +- paddle/fluid/operators/svd_helper.h | 38 +-- .../fluid/operators/sync_batch_norm_op_mlu.cc | 29 ++- .../fluid/operators/sync_batch_norm_op_npu.cc | 142 ++++++------ .../fluid/operators/take_along_axis_op_npu.cc | 2 - paddle/fluid/operators/tdm_child_op.h | 1 - paddle/fluid/operators/tdm_sampler_op.h | 1 - .../teacher_student_sigmoid_loss_op.cc | 12 +- .../teacher_student_sigmoid_loss_op.h | 1 - paddle/fluid/operators/temporal_shift_op.h | 1 - paddle/fluid/operators/tile_op_mlu.cc | 2 - paddle/fluid/operators/tile_op_npu.cc | 1 - paddle/fluid/operators/top_k_op.cu | 4 +- paddle/fluid/operators/top_k_op.h | 2 - paddle/fluid/operators/top_k_op_npu.cc | 2 +- paddle/fluid/operators/top_k_op_xpu.cc | 1 - paddle/fluid/operators/tree_conv_op.h | 13 +- .../truncated_gaussian_random_op_npu.cc | 14 +- paddle/fluid/operators/uniform_random_op.cc | 2 +- paddle/fluid/operators/uniform_random_op.cu | 3 +- paddle/fluid/operators/uniform_random_op.h | 1 - .../fluid/operators/uniform_random_op_mlu.cc | 5 +- .../fluid/operators/uniform_random_op_npu.cc | 5 +- paddle/fluid/operators/var_conv_2d_op.cc | 37 +-- paddle/fluid/operators/var_conv_2d_op.h | 1 - paddle/fluid/operators/where_index_op_mlu.cc | 6 +- paddle/fluid/operators/where_index_op_npu.cc | 12 +- 419 files changed, 2450 insertions(+), 2880 deletions(-) diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index b57c874ceebe0..c1838ee201d45 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -644,11 +644,11 @@ void GradientAccumulator::CallGradientHooks() { true, platform::errors::PreconditionNotMet( "Only can call gradient hooks after sum gradient completed.")); - PADDLE_ENFORCE_EQ( - HasInnerVar(), - true, - platform::errors::PreconditionNotMet( - "Leaf Tensor's inner var is nullptr when call gradient hook.")); + PADDLE_ENFORCE_EQ(HasInnerVar(), + true, + platform::errors::PreconditionNotMet( + "Leaf Tensor's inner var is nullptr when " + "call gradient hook.")); PADDLE_ENFORCE_EQ( inner_var_->Var().IsInitialized(), true, diff --git a/paddle/fluid/operators/abs_op_mlu.cc b/paddle/fluid/operators/abs_op_mlu.cc index 9afa4c28e0544..e635b9547b4fc 100644 --- a/paddle/fluid/operators/abs_op_mlu.cc +++ b/paddle/fluid/operators/abs_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class AbsMLUKernel : public framework::OpKernel { public: @@ -54,7 +52,7 @@ class AbsGradMLUKernel : public framework::OpKernel { MLUCnnlOpTensorDesc mul_op_desc( CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); - Tensor sign_x; + phi::DenseTensor sign_x; sign_x.mutable_data(x->dims(), ctx.GetPlace()); MLUCnnl::Sign(ctx, diff --git a/paddle/fluid/operators/abs_op_npu.cc b/paddle/fluid/operators/abs_op_npu.cc index a1ca88ae5b572..47c88abb9ede1 100644 --- a/paddle/fluid/operators/abs_op_npu.cc +++ b/paddle/fluid/operators/abs_op_npu.cc @@ -18,8 +18,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class AbsNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/activation_op_mlu.cc b/paddle/fluid/operators/activation_op_mlu.cc index 736b398996b45..f26af0a5b9743 100644 --- a/paddle/fluid/operators/activation_op_mlu.cc +++ b/paddle/fluid/operators/activation_op_mlu.cc @@ -21,8 +21,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ActivationMLUKernel : public framework::OpKernel { public: @@ -442,7 +440,7 @@ class ReciprocalGradMLUKernel : public framework::OpKernel { auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); - Tensor square_out; + phi::DenseTensor square_out; square_out.Resize(out->dims()); square_out.mutable_data(place); MLUCnnlTensorDesc out_desc(*out); diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc index 3c6e207b971bc..b471c08d39ce9 100644 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -24,14 +24,12 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PowNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto factor = ctx.Attr("factor"); out->mutable_data(ctx.GetPlace()); @@ -54,9 +52,9 @@ template class PowGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto factor = ctx.Attr("factor"); auto x_dims = x->dims(); @@ -69,7 +67,7 @@ class PowGradNPUKernel : public framework::OpKernel { // NOTE(liym27): dx = dout * factor * x.pow(factor-1) // Step1: Compute x_pow = x.pow(factor-1) - Tensor x_pow(x->type()); + phi::DenseTensor x_pow(x->type()); x_pow.mutable_data(x->dims(), place); const auto& runner_pow = NpuOpRunner( "Power", {*x}, {x_pow}, {{"power", factor - static_cast(1)}}); @@ -78,13 +76,13 @@ class PowGradNPUKernel : public framework::OpKernel { // Step 2: Construct a broadcast factor, which has the same shape with x. // 2.1 Get a factor tensor with shape [1]. - Tensor factor_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor factor_tensor(experimental::DataType::FLOAT32); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, factor); // 2.2 Get the factor which has the shape with x and the same value with // factor. - Tensor factor_bc_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor factor_bc_tensor(experimental::DataType::FLOAT32); factor_bc_tensor.mutable_data(x_dims, place); const auto& runner_bc = NpuOpRunner("FillD", {factor_tensor}, @@ -93,7 +91,7 @@ class PowGradNPUKernel : public framework::OpKernel { runner_bc.Run(stream); // Step 3: Compute x_power_mul_factor = factor * x.pow(factor-1) - Tensor x_power_mul_factor(x->type()); + phi::DenseTensor x_power_mul_factor(x->type()); x_power_mul_factor.mutable_data(x->dims(), place); const auto& runner_mul_1 = NpuOpRunner("Mul", {factor_bc_tensor, x_pow}, {x_power_mul_factor}, {}); @@ -111,8 +109,8 @@ template class ReluNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -134,9 +132,9 @@ template class ReluGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto stream = ctx.template device_context() @@ -153,8 +151,8 @@ template class Relu6NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -176,9 +174,9 @@ template class Relu6GradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto stream = ctx.template device_context() @@ -195,9 +193,9 @@ template class SqrtNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -216,8 +214,8 @@ template class LeakyReluNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto alpha = ctx.Attr("alpha"); out->mutable_data(ctx.GetPlace()); @@ -236,9 +234,9 @@ template class LeakyReluGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto alpha = ctx.Attr("alpha"); auto stream = @@ -257,10 +255,10 @@ template class SqrtGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); @@ -279,9 +277,9 @@ template class LogNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -291,12 +289,12 @@ class LogNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor one(x->type()); + phi::DenseTensor one(x->type()); one.mutable_data(x->dims(), place); const auto& runner_one = NpuOpRunner("OnesLike", {*x}, {one}, {}); runner_one.Run(stream); - Tensor sub(x->type()); + phi::DenseTensor sub(x->type()); sub.mutable_data(x->dims(), place); const auto& runner_sub = NpuOpRunner("Sub", {*x, one}, {sub}, {}); runner_sub.Run(stream); @@ -310,10 +308,10 @@ template class LogGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); @@ -331,9 +329,9 @@ template class TanhNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -352,10 +350,10 @@ template class TanhGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = ctx.Input("Out"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); @@ -374,9 +372,9 @@ template class SquareNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -395,9 +393,9 @@ template class SquareGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto factor = static_cast(2.0); @@ -406,7 +404,7 @@ class SquareGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); // Step 1: Compute x_muls_factor = factor * x - Tensor x_muls_factor(x->type()); + phi::DenseTensor x_muls_factor(x->type()); x_muls_factor.mutable_data(x->dims(), place); const auto& runner_muls_1 = NpuOpRunner("Muls", {*x}, {x_muls_factor}, {{"value", factor}}); @@ -424,9 +422,9 @@ template class SigmoidNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -445,10 +443,10 @@ template class SigmoidGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = ctx.Input("Out"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); @@ -469,8 +467,8 @@ template class SwishNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); float beta = ctx.Attr("beta"); out->mutable_data(ctx.GetPlace()); @@ -494,9 +492,9 @@ template class SwishGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); float beta = ctx.Attr("beta"); dx->mutable_data(ctx.GetPlace()); @@ -504,7 +502,7 @@ class SwishGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor beta_x, sigmoid_out, swish_out; + phi::DenseTensor beta_x, sigmoid_out, swish_out; beta_x.mutable_data(x->dims(), ctx.GetPlace()); sigmoid_out.mutable_data(x->dims(), ctx.GetPlace()); swish_out.mutable_data(x->dims(), ctx.GetPlace()); @@ -543,8 +541,8 @@ template class HardSwishNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); float threshold = ctx.Attr("threshold"); float scale = ctx.Attr("scale"); @@ -558,25 +556,25 @@ class HardSwishNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor tensor_offset(x->type()); + phi::DenseTensor tensor_offset(x->type()); tensor_offset.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_offset, static_cast(offset)); - Tensor add_offset_val(x->type()); + phi::DenseTensor add_offset_val(x->type()); add_offset_val.mutable_data(x->dims(), place); const auto& runner_add = NpuOpRunner("AddV2", {*x, tensor_offset}, {add_offset_val}); runner_add.Run(stream); - Tensor tensor_threshold(x->type()); + phi::DenseTensor tensor_threshold(x->type()); tensor_threshold.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_threshold, static_cast(threshold)); - Tensor tensor_zero(x->type()); + phi::DenseTensor tensor_zero(x->type()); tensor_zero.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_zero, static_cast(0.0)); - Tensor clip_val(x->type()); + phi::DenseTensor clip_val(x->type()); clip_val.mutable_data(x->dims(), place); const auto& runner_clip = NpuOpRunner("ClipByValue", @@ -584,10 +582,10 @@ class HardSwishNPUKernel : public framework::OpKernel { {clip_val}); runner_clip.Run(stream); - Tensor tensor_scale_tmp(x->type()); + phi::DenseTensor tensor_scale_tmp(x->type()); tensor_scale_tmp.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_scale_tmp, static_cast(scale)); - Tensor tensor_scale(x->type()); + phi::DenseTensor tensor_scale(x->type()); tensor_scale.mutable_data(x->dims(), place); const auto& runner_fill = NpuOpRunner("FillD", @@ -596,7 +594,7 @@ class HardSwishNPUKernel : public framework::OpKernel { {{"dims", phi::vectorize(x->dims())}}); runner_fill.Run(stream); - Tensor div_val(x->type()); + phi::DenseTensor div_val(x->type()); div_val.mutable_data(x->dims(), place); const auto& runner_div = NpuOpRunner("Div", {clip_val, tensor_scale}, {div_val}); @@ -611,9 +609,9 @@ template class HardSwishGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); float threshold = ctx.Attr("threshold"); float scale = ctx.Attr("scale"); @@ -627,23 +625,23 @@ class HardSwishGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor tensor_offset(x->type()); + phi::DenseTensor tensor_offset(x->type()); tensor_offset.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_offset, static_cast(offset)); - Tensor add_offset_val(x->type()); + phi::DenseTensor add_offset_val(x->type()); add_offset_val.mutable_data(x->dims(), place); const auto& runner_add = NpuOpRunner("AddV2", {*x, tensor_offset}, {add_offset_val}); runner_add.Run(stream); - Tensor tmp1(x->type()); + phi::DenseTensor tmp1(x->type()); tmp1.mutable_data(x->dims(), place); const auto& runner_pow1 = NpuOpRunner( "Power", {*x}, {tmp1}, {{"scale", 2.0f}, {"shift", offset}}); runner_pow1.Run(stream); - Tensor tmp2(x->type()); + phi::DenseTensor tmp2(x->type()); tmp2.mutable_data(x->dims(), place); const auto& runner_ht_grad = NpuOpRunner("HardtanhGrad", @@ -652,17 +650,17 @@ class HardSwishGradNPUKernel : public framework::OpKernel { {{"min_val", 0.0f}, {"max_val", threshold}}); runner_ht_grad.Run(stream); - Tensor tmp3(x->type()); + phi::DenseTensor tmp3(x->type()); tmp3.mutable_data(x->dims(), place); const auto& runner_pow2 = NpuOpRunner( "Power", {tmp2}, {tmp3}, {{"scale", 1.0f / scale}, {"shift", 1.0f}}); runner_pow2.Run(stream); - Tensor tensor_threshold_tmp(x->type()); + phi::DenseTensor tensor_threshold_tmp(x->type()); tensor_threshold_tmp.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_threshold_tmp, static_cast(threshold)); - Tensor tensor_threshold(x->type()); + phi::DenseTensor tensor_threshold(x->type()); tensor_threshold.mutable_data(x->dims(), place); const auto& runner_fill = NpuOpRunner("FillD", @@ -671,12 +669,12 @@ class HardSwishGradNPUKernel : public framework::OpKernel { {{"dims", phi::vectorize(x->dims())}}); runner_fill.Run(stream); - Tensor tmp_bool(experimental::DataType::BOOL); + phi::DenseTensor tmp_bool(experimental::DataType::BOOL); tmp_bool.mutable_data(x->dims(), place); const auto& runner_less = NpuOpRunner("Less", {add_offset_val, tensor_threshold}, {tmp_bool}); runner_less.Run(stream); - Tensor tmp4(x->type()); + phi::DenseTensor tmp4(x->type()); tmp4.mutable_data(x->dims(), place); auto dst_dtype = ConvertToNpuDtype(framework::TransToProtoVarType(x->type())); @@ -687,7 +685,7 @@ class HardSwishGradNPUKernel : public framework::OpKernel { {{"dst_type", static_cast(dst_dtype)}}); runner_cast.Run(stream); - Tensor tmp5(x->type()); + phi::DenseTensor tmp5(x->type()); tmp5.mutable_data(x->dims(), place); const auto& runner_sub = NpuOpRunner("Sub", {tmp3, tmp4}, {tmp5}); runner_sub.Run(stream); @@ -701,8 +699,8 @@ template class HardSigmoidNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); float slope = ctx.Attr("slope"); float offset = ctx.Attr("offset"); @@ -724,10 +722,10 @@ template class HardSigmoidGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = ctx.Input("Out"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); float slope = ctx.Attr("slope"); float offset = ctx.Attr("offset"); @@ -751,8 +749,8 @@ template class ReciprocalNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); auto stream = @@ -767,9 +765,9 @@ template class ReciprocalGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); auto stream = @@ -785,8 +783,8 @@ template class CosNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); @@ -804,14 +802,14 @@ template class CosGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); - Tensor sin_out(x->type()); // Temporary Tensor + phi::DenseTensor sin_out(x->type()); // Temporary phi::DenseTensor sin_out.Resize(x->dims()); sin_out.mutable_data(place); @@ -824,7 +822,7 @@ class CosGradNPUKernel : public framework::OpKernel { const auto& runner_dx = NpuOpRunner("Mul", {*dout, sin_out}, {*dx}, {}); runner_dx.Run(stream); - Tensor tmp(x->type()); // Temporary Tensor + phi::DenseTensor tmp(x->type()); // Temporary phi::DenseTensor tmp.Resize(phi::make_ddim({1, 1})); tmp.mutable_data(place); float factor = -1.; @@ -840,8 +838,8 @@ template class AtanNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); const auto& runner = NpuOpRunner("Atan", {*x}, {*out}, {}); @@ -856,9 +854,9 @@ template class AtanGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); auto stream = @@ -888,9 +886,9 @@ template class ExpGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); auto stream = ctx.template device_context() @@ -904,9 +902,9 @@ template class SinNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 2d7eb04f1dba0..b23d3670d5e80 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -28,8 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class AffineGridOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc index 508c51de723c0..6c78a20e2a51a 100644 --- a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class AllocFloatStatusKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc index 5f5415ffd37d0..543b40ee8fcd0 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel { using MPDType = typename details::MPTypeTrait::Type; @@ -45,7 +43,7 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); // check is_finite or is_nan - Tensor is_finite(found_inf->type()); + phi::DenseTensor is_finite(found_inf->type()); if (i != 0) { is_finite.Resize(phi::make_ddim({1})); is_finite.mutable_data(ctx.GetPlace()); @@ -78,8 +76,8 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel { // out = in/scale, if found_inf = false // But when found_inf is true, the data of Out should not be used. // So, on MLU, we always compute out with in/scale. - Tensor float_x; - Tensor float_out; + phi::DenseTensor float_x; + phi::DenseTensor float_out; if (std::is_same::value) { float_x.Resize(x->dims()); float_out.Resize(out->dims()); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc index 3b6e2ba7184c0..c65b889618f07 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - // NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA. // On NPU, we do not really check the data of input tensors, // but use NPUGetFloatStatus to check whether the nan/inf occurs on device, @@ -47,13 +45,13 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { .stream(); // step1: inverse scale - Tensor const_tensor; + phi::DenseTensor const_tensor; const_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&const_tensor, static_cast(1.0)); // Inverse(1.0/scale) phi::DenseTensor* tmp_inverse_out = const_cast(scale); - Tensor inverse_out(scale->type()); + phi::DenseTensor inverse_out(scale->type()); inverse_out.Resize(scale->dims()); inverse_out.mutable_data(ctx.GetPlace()); const auto& runner_inverse = @@ -62,7 +60,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { tmp_inverse_out = &inverse_out; // NOTE(zhiqiu): - Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({8}, ctx.GetPlace()); // NOTE(zhiqiu): NPUGetFloatStatus updates data on input in-place. // tmp is only placeholder. @@ -73,7 +71,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { {{"message", std::string("check_nan_and_inf")}}); runner_float_status.Run(stream); - Tensor sum; + phi::DenseTensor sum; sum.mutable_data({1}, ctx.GetPlace()); const auto& runner_reduce_sum = NpuOpRunner("ReduceSumD", diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index cca370bf95331..bf7272ba8b878 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -31,8 +31,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -using Tensor = phi::DenseTensor; - USE_OP_ITSELF(check_finite_and_unscale); USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU); @@ -110,7 +108,7 @@ void Compare(f::Scope *scope, const p::DeviceContext &ctx) { ctx.Wait(); // out found_inf - Tensor found_inf_tensor; + phi::DenseTensor found_inf_tensor; found_inf_tensor.Resize({1}); bool *found_inf_data = found_inf_tensor.mutable_data(paddle::platform::CPUPlace()); diff --git a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc index b5750181139d4..18e68e1ba377f 100644 --- a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ClearFloatStatusKernel : public framework::OpKernel { public: @@ -35,7 +33,7 @@ class ClearFloatStatusKernel : public framework::OpKernel { platform::errors::PreconditionNotMet( "The input(FloatStatus) and Output(FloatStatusOut) " "should be the same.")); - Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({8}, ctx.GetPlace()); const auto& runner = NpuOpRunner("NPUClearFloatStatus", {tmp}, {*float_status_out}); diff --git a/paddle/fluid/operators/amp/get_float_status_op_npu.cc b/paddle/fluid/operators/amp/get_float_status_op_npu.cc index 8befb2df9b835..c6dd6f4e6b968 100644 --- a/paddle/fluid/operators/amp/get_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/get_float_status_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class GetFloatStatusKernel : public framework::OpKernel { public: @@ -35,7 +33,7 @@ class GetFloatStatusKernel : public framework::OpKernel { platform::errors::PreconditionNotMet( "The input(FloatStatus) and Output(FloatStatusOut) " "should be the same.")); - Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({8}, ctx.GetPlace()); auto stream = ctx.template device_context() diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc index b1bfcf8edd672..fb5475610ce15 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc @@ -25,8 +25,6 @@ DECLARE_int32(min_loss_scaling); namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void Update(const platform::NPUDeviceContext& ctx, const std::vector found_inf_vec, @@ -50,7 +48,7 @@ void Update(const platform::NPUDeviceContext& ctx, good_out_tensor->numel() * sizeof(int), stream); // bad_out_data = bad_in_data + 1 - Tensor factor_tensor(bad_out_tensor->dtype()); + phi::DenseTensor factor_tensor(bad_out_tensor->dtype()); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); const auto& runner_p2 = NpuOpRunner( @@ -106,7 +104,7 @@ void Update(const platform::NPUDeviceContext& ctx, stream); // good_out_data = good_in_data + 1 - Tensor factor_tensor(good_out_tensor->dtype()); + phi::DenseTensor factor_tensor(good_out_tensor->dtype()); factor_tensor.mutable_data({1}, place); FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); const auto& runner_p2 = NpuOpRunner( diff --git a/paddle/fluid/operators/arg_max_op_npu.cc b/paddle/fluid/operators/arg_max_op_npu.cc index 6e5048db47ead..175703eaf9fa5 100644 --- a/paddle/fluid/operators/arg_max_op_npu.cc +++ b/paddle/fluid/operators/arg_max_op_npu.cc @@ -18,7 +18,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -36,7 +35,7 @@ struct VisitDataArgNPUMaxFunctor { auto dtype = ctx.Attr("dtype"); const bool& flatten = ctx.Attr("flatten"); - Tensor transformed_x(x.type()); + phi::DenseTensor transformed_x(x.type()); transformed_x.ShareDataWith(x); if (flatten) { transformed_x.Resize(phi::make_ddim({x.numel()})); diff --git a/paddle/fluid/operators/arg_min_op_npu.cc b/paddle/fluid/operators/arg_min_op_npu.cc index fe917140b7b9f..5132393cd3727 100644 --- a/paddle/fluid/operators/arg_min_op_npu.cc +++ b/paddle/fluid/operators/arg_min_op_npu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ArgMinNPUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/argsort_op_npu.cc b/paddle/fluid/operators/argsort_op_npu.cc index 7aedb41c9fde3..d5a42b8228e0a 100644 --- a/paddle/fluid/operators/argsort_op_npu.cc +++ b/paddle/fluid/operators/argsort_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -79,16 +78,16 @@ class ArgsortNPUKernel : public framework::OpKernel { framework::NPUAttributeMap attr = {{"axis", -1}, {"descending", descending}}; - Tensor indices_tmp(experimental::DataType::INT32); + phi::DenseTensor indices_tmp(experimental::DataType::INT32); indices_tmp.Resize(indices->dims()); if (framework::TransToProtoVarType(input->dtype()) == framework::proto::VarType::INT64) { - Tensor input_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor input_fp32(experimental::DataType::FLOAT32); input_fp32.Resize(input->dims()); CastToFP32(ctx, stream, *input, &input_fp32); - Tensor output_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor output_fp32(experimental::DataType::FLOAT32); output_fp32.Resize(output->dims()); if (axis == -1 || axis + 1 == in_dims.size()) { @@ -112,12 +111,12 @@ class ArgsortNPUKernel : public framework::OpKernel { } auto trans_dims = phi::make_ddim(shape); - Tensor trans_input(input_fp32.type()); + phi::DenseTensor trans_input(input_fp32.type()); trans_input.Resize(trans_dims); TranposeNPU(ctx, stream, &perm, input_fp32, &trans_input); - Tensor trans_output(input_fp32.type()); - Tensor trans_indices(experimental::DataType::INT32); + phi::DenseTensor trans_output(input_fp32.type()); + phi::DenseTensor trans_indices(experimental::DataType::INT32); trans_output.mutable_data(trans_dims, ctx.GetPlace()); trans_indices.mutable_data(trans_dims, ctx.GetPlace()); @@ -150,12 +149,12 @@ class ArgsortNPUKernel : public framework::OpKernel { } auto trans_dims = phi::make_ddim(shape); - Tensor trans_input(input->type()); + phi::DenseTensor trans_input(input->type()); trans_input.Resize(trans_dims); TranposeNPU(ctx, stream, &perm, *input, &trans_input); - Tensor trans_output(input->type()); - Tensor trans_indices(experimental::DataType::INT32); + phi::DenseTensor trans_output(input->type()); + phi::DenseTensor trans_indices(experimental::DataType::INT32); trans_output.mutable_data(trans_dims, ctx.GetPlace()); trans_indices.mutable_data(trans_dims, ctx.GetPlace()); @@ -183,12 +182,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; - Tensor input_tmp; + phi::DenseTensor input_tmp; input_tmp.ShareDataWith(input); input_tmp.Resize( phi::make_ddim(std::vector{input_height * input_width})); - Tensor indices_tmp; + phi::DenseTensor indices_tmp; indices_tmp.ShareDataWith(indices); indices_tmp.Resize( phi::make_ddim(std::vector{input_height, input_width})); @@ -197,12 +196,12 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, for (Type i = 0; i < input_height; i++) { indexs_value.push_back(i * input_width); } - Tensor indexs_tmp(indices.type()); + phi::DenseTensor indexs_tmp(indices.type()); framework::TensorFromVector( indexs_value, ctx.device_context(), &indexs_tmp); indexs_tmp.Resize(phi::make_ddim(std::vector{input_height, 1})); - Tensor indices_index(indices.type()); + phi::DenseTensor indices_index(indices.type()); indices_index.mutable_data(indices_tmp.dims(), ctx.GetPlace()); const auto& runner_add = NpuOpRunner("Add", {indices_tmp, indexs_tmp}, {indices_index}, {}); @@ -212,7 +211,7 @@ static void FullAssignNPU(const framework::ExecutionContext& ctx, phi::make_ddim(std::vector{input_height * input_width})); t_out->mutable_data(ctx.GetPlace()); - Tensor out_tmp(t_out->type()); + phi::DenseTensor out_tmp(t_out->type()); out_tmp.ShareDataWith(*t_out); const auto& runner = NpuOpRunner("TensorScatterUpdate", @@ -252,15 +251,15 @@ class ArgsortGradNPUKernel : public framework::OpKernel { } auto trans_dims = phi::make_ddim(shape); - Tensor trans_dout(dO->type()); - Tensor trans_ids(indices->type()); + phi::DenseTensor trans_dout(dO->type()); + phi::DenseTensor trans_ids(indices->type()); trans_dout.Resize(trans_dims); trans_ids.Resize(trans_dims); TranposeNPU(ctx, stream, &perm, *dO, &trans_dout); TranposeNPU(ctx, stream, &perm, *indices, &trans_ids); - Tensor trans_dx(dO->type()); + phi::DenseTensor trans_dx(dO->type()); trans_dx.Resize(trans_dims); FullAssignNPU( ctx, stream, trans_dims, trans_dout, trans_ids, &trans_dx); diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index d3ae66b3c02ff..9dff9a05d73ad 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -212,39 +212,41 @@ void AttentionLSTMOpMaker::Make() { "this phi::DenseTensor is a matrix with shape (T X M), where T is the " "total time steps in this mini-batch, M is the dim size of x."); AddInput("C0", - "(Tensor) LSTM C0" + "(phi::DenseTensor) LSTM C0" "This is a tensor with shape (N x D), where N is the batch size, D " "is the gate size." "C0 is necessary because of attention."); AddInput("H0", - "(Tensor, optional) LSTM H0" + "(phi::DenseTensor, optional) LSTM H0" "This is a tensor with shape (N x D), where N is the " "batch size and D is the gate size.") .AsDispensable(); AddInput("AttentionWeight", - "(Tensor) the weights of attention fc. Always relu the fc result." + "(phi::DenseTensor) the weights of attention fc. Always relu the fc " + "result." "The shape is ((M+D) x 1), where M is the dim size of x, D is the " "gate size of LSTM."); AddInput("AttentionBias", - "(Tensor, optional) the bias of attention fc." + "(phi::DenseTensor, optional) the bias of attention fc." "The shape is (1 x 1)") .AsDispensable(); AddInput("AttentionScalar", - "(Tensor, optional) the scalar on the result of attentioned fc. " + "(phi::DenseTensor, optional) the scalar on the result of " + "attentioned fc. " "Always relu the Scalar." "The shape is (1 x 1)") .AsDispensable(); AddInput("AttentionScalarBias", - "(Tensor, optional) the scalar bias of attention fc." + "(phi::DenseTensor, optional) the scalar bias of attention fc." "The shape is (1 x 1)") .AsDispensable(); AddInput("LSTMWeight", - "(Tensor) the combined weight of LSTM" + "(phi::DenseTensor) the combined weight of LSTM" " - The shape is ((D+M) x 4D), where D is the hidden gate size, M " "is the dim size of x" " - Weight = {W_forget, W_input, W_output, W_cell}"); AddInput("LSTMBias", - "(Tensor) the combined bias of LSTM, shape (1x4D)." + "(phi::DenseTensor) the combined bias of LSTM, shape (1x4D)." "Note: we should add the bias of hidden and context accorindg to " "the same gate: " "{B_forget, B_input, B_output, B_cell}"); @@ -257,21 +259,22 @@ void AttentionLSTMOpMaker::Make() { "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. " "The shape is (T x D), and lod is the same with the `Input`."); AddOutput("AttentionedX", - "(Tensor) shape is (T x 1), the result after X * AttentionWeight," + "(phi::DenseTensor) shape is (T x 1), the result after X * " + "AttentionWeight," " where T is the total time steps in this mini-batch," " D is the hidden size.") .AsIntermediate(); AddOutput("AttentionFCOut", - "(Tensor) (max_seq_len, 1), compute at each step.") + "(phi::DenseTensor) (max_seq_len, 1), compute at each step.") .AsIntermediate(); AddOutput("LSTMX", - "(Tensor) the input X of LSTM for each step." + "(phi::DenseTensor) the input X of LSTM for each step." "Shape is (1 x M), where M is the x frame size") .AsIntermediate(); - AddOutput( - "LSTMOUT", - "(Tensor) the output of LSTM X(1*(D+M))* weight((D+M)*4D) for each step." - "Shape is (1 x 4D), where M is the x frame size") + AddOutput("LSTMOUT", + "(phi::DenseTensor) the output of LSTM X(1*(D+M))* " + "weight((D+M)*4D) for each step." + "Shape is (1 x 4D), where M is the x frame size") .AsIntermediate(); AddAttr("gate_activation", "(string, default: sigmoid)" diff --git a/paddle/fluid/operators/attention_lstm_op.h b/paddle/fluid/operators/attention_lstm_op.h index 41d7d594df207..0ce83be93c6cc 100644 --- a/paddle/fluid/operators/attention_lstm_op.h +++ b/paddle/fluid/operators/attention_lstm_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class AttentionLSTMOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index abf177ee9f9f4..b4a24c84bcc45 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -207,7 +207,7 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( framework::OpKernelType BatchNormOp::GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN // Only input require reshaping, weights and @@ -265,7 +265,7 @@ void BatchNormOpMaker::Make() { "The global variance (for training) " "or estimated Variance (for testing)"); AddInput("MomentumTensor", - "(Tensor, optional) If provided, batch_norm will " + "(phi::DenseTensor, optional) If provided, batch_norm will " "use this as momentum, this has a higher priority than " "attr(momentum), the shape of this tensor MUST BE [1].") .AsDispensable(); @@ -380,9 +380,9 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType( PADDLE_THROW( platform::errors::InvalidArgument("can't find gradient variable of Y")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } @@ -397,7 +397,7 @@ framework::OpKernelType BatchNormGradOp::GetExpectedKernelType( framework::OpKernelType BatchNormGradOp::GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN // Only input require reshaping, weights and @@ -522,9 +522,9 @@ framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( PADDLE_THROW( platform::errors::NotFound("cannot find gradient variable of Y")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index 29c40f1b41ef8..e643efcb8b9f5 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -34,7 +34,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template using CudnnDataType = platform::CudnnDataType; diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index b11deeb49509b..40cdb68329fb2 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -27,7 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template diff --git a/paddle/fluid/operators/batch_norm_op_mlu.cc b/paddle/fluid/operators/batch_norm_op_mlu.cc index a2ed462b0fe7b..77397552333d4 100644 --- a/paddle/fluid/operators/batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/batch_norm_op_mlu.cc @@ -78,8 +78,8 @@ class MLUBatchNormOpKernel : public framework::OpKernel { saved_mean->mutable_data(place); saved_variance->mutable_data(place); - Tensor transformed_x; - Tensor transformed_y; + phi::DenseTensor transformed_x; + phi::DenseTensor transformed_y; const int transformed_dim_size = 4; const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; MLUCnnlTensorDesc transformed_desc(transformed_dim_size, @@ -116,7 +116,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel { if (ctx.HasInput("MomentumTensor")) { const auto *mom_tensor = ctx.Input("MomentumTensor"); - Tensor mom_cpu; + phi::DenseTensor mom_cpu; framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; } @@ -226,9 +226,9 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel { : x_dims[x_dims.size() - 1]); const int sample_size = x->numel() / N / C; - Tensor transformed_d_y; - Tensor transformed_x; - Tensor transformed_d_x; + phi::DenseTensor transformed_d_y; + phi::DenseTensor transformed_x; + phi::DenseTensor transformed_d_x; const int transformed_dim_size = 4; const int transformed_shape[transformed_dim_size] = {N, sample_size, 1, C}; diff --git a/paddle/fluid/operators/batch_norm_op_npu.cc b/paddle/fluid/operators/batch_norm_op_npu.cc index 244b76ff86be9..94c2f7297b821 100644 --- a/paddle/fluid/operators/batch_norm_op_npu.cc +++ b/paddle/fluid/operators/batch_norm_op_npu.cc @@ -89,7 +89,7 @@ class NPUBatchNormOpKernel : public framework::OpKernel { // is only used in this training branch if (ctx.HasInput("MomentumTensor")) { const auto *mom_tensor = ctx.Input("MomentumTensor"); - Tensor mom_cpu; + phi::DenseTensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; diff --git a/paddle/fluid/operators/bce_loss_op_mlu.cc b/paddle/fluid/operators/bce_loss_op_mlu.cc index 99fd402424e7c..6541de153d4be 100644 --- a/paddle/fluid/operators/bce_loss_op_mlu.cc +++ b/paddle/fluid/operators/bce_loss_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class BCELossMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/bce_loss_op_npu.cc b/paddle/fluid/operators/bce_loss_op_npu.cc index c6b2d12ac535e..5918bee19453c 100644 --- a/paddle/fluid/operators/bce_loss_op_npu.cc +++ b/paddle/fluid/operators/bce_loss_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class BCELossNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc index 5505d3b4e3250..10b25fc478744 100644 --- a/paddle/fluid/operators/cast_op.cc +++ b/paddle/fluid/operators/cast_op.cc @@ -40,7 +40,7 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { Cast Operator. This Operator casts the input tensor to another data type and -returns the Output Tensor. It's meaningless if the output dtype equals +returns the Output phi::DenseTensor. It's meaningless if the output dtype equals the input dtype, but it's fine if you do so. )DOC"); diff --git a/paddle/fluid/operators/cast_op_mlu.cc b/paddle/fluid/operators/cast_op_mlu.cc index 7e85702eee4b1..cb0bc659fbb0f 100644 --- a/paddle/fluid/operators/cast_op_mlu.cc +++ b/paddle/fluid/operators/cast_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CastMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/cast_op_npu.cc b/paddle/fluid/operators/cast_op_npu.cc index 9c430fc0ffe30..0e2775efd1328 100644 --- a/paddle/fluid/operators/cast_op_npu.cc +++ b/paddle/fluid/operators/cast_op_npu.cc @@ -32,8 +32,6 @@ static std::map {framework::proto::VarType::FP64, ACL_DOUBLE}, }; -using Tensor = phi::DenseTensor; - template class CastNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/center_loss_op.h b/paddle/fluid/operators/center_loss_op.h index 989a27f552118..36fe957102bfb 100644 --- a/paddle/fluid/operators/center_loss_op.h +++ b/paddle/fluid/operators/center_loss_op.h @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template @@ -81,7 +80,7 @@ class CenterLossKernel : public framework::OpKernel { auto loss_data = out_loss->mutable_data(ctx.GetPlace()); - Tensor centers_diffacc; // used to accumulate all diff + phi::DenseTensor centers_diffacc; // used to accumulate all diff auto centers_diffacc_data = centers_diffacc.mutable_data(centers_dim, ctx.GetPlace()); int numel = centers_diffacc.numel(); diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index 21658be577ebd..f54e323eefb44 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; // using SelectedRows = phi::SelectedRows; template class NPUClipByNormKernel : public framework::OpKernel { public: @@ -48,7 +46,7 @@ class NPUClipByNormKernel : public framework::OpKernel { "Input(X) of ClipByNormOp should not be null. " "Please check if it is created correctly.")); - Tensor square_sum(input->type()); + phi::DenseTensor square_sum(input->type()); square_sum.mutable_data(framework::DDim({1}), place); const auto& x_dims = input->dims(); std::vector axis; @@ -62,12 +60,12 @@ class NPUClipByNormKernel : public framework::OpKernel { {{"axis", axis}, {"keep_dims", false}}); square_sum_runner.Run(stream); - Tensor x_norm(input->type()); + phi::DenseTensor x_norm(input->type()); x_norm.mutable_data(framework::DDim({1}), place); const auto& x_norm_runner = NpuOpRunner("Sqrt", {square_sum}, {x_norm}, {}); x_norm_runner.Run(stream); - Tensor x_norm_t; + phi::DenseTensor x_norm_t; framework::TensorCopySync(x_norm, platform::CPUPlace(), &x_norm_t); auto x_norm_v = static_cast(*x_norm_t.data()); if (x_norm_v <= max_norm) { diff --git a/paddle/fluid/operators/clip_op_mlu.cc b/paddle/fluid/operators/clip_op_mlu.cc index daced778a95dc..f84a493d6d399 100644 --- a/paddle/fluid/operators/clip_op_mlu.cc +++ b/paddle/fluid/operators/clip_op_mlu.cc @@ -29,7 +29,7 @@ class ClipMLUKernel : public framework::OpKernel { auto max = static_cast(ctx.Attr("max")); if (ctx.HasInput("Min")) { - Tensor min_cpu; + phi::DenseTensor min_cpu; auto* min_tensor = ctx.Input("Min"); auto* min_data = min_tensor->data(); if (platform::is_mlu_place(min_tensor->place())) { @@ -41,7 +41,7 @@ class ClipMLUKernel : public framework::OpKernel { } if (ctx.HasInput("Max")) { - Tensor max_cpu; + phi::DenseTensor max_cpu; auto* max_tensor = ctx.Input("Max"); auto* max_data = max_tensor->data(); if (platform::is_mlu_place(max_tensor->place())) { @@ -80,7 +80,7 @@ class ClipGradMLUKernel : public framework::OpKernel { auto min_val = ctx.Attr("min"); if (min_tensor) { - Tensor min_data; + phi::DenseTensor min_data; framework::TensorCopy( *min_tensor, platform::CPUPlace(), @@ -91,7 +91,7 @@ class ClipGradMLUKernel : public framework::OpKernel { } auto max_val = ctx.Attr("max"); if (max_tensor) { - Tensor max_data; + phi::DenseTensor max_data; framework::TensorCopy( *max_tensor, platform::CPUPlace(), diff --git a/paddle/fluid/operators/clip_op_npu.cc b/paddle/fluid/operators/clip_op_npu.cc index 19ae23add0e10..82056ab0acb4a 100644 --- a/paddle/fluid/operators/clip_op_npu.cc +++ b/paddle/fluid/operators/clip_op_npu.cc @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ClipNPUKernel : public framework::OpKernel { public: @@ -33,8 +31,8 @@ class ClipNPUKernel : public framework::OpKernel { auto max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; - Tensor min_tensor_temp(x->type()); - Tensor max_tensor_temp(x->type()); + phi::DenseTensor min_tensor_temp(x->type()); + phi::DenseTensor max_tensor_temp(x->type()); if (min_tensor == nullptr) { auto min_value = static_cast(ctx.Attr("min")); min_tensor_temp.mutable_data({1}, ctx.GetPlace()); @@ -74,7 +72,7 @@ class ClipGradNPUKernel : public framework::OpKernel { auto min_val = ctx.Attr("min"); if (min_tensor) { - Tensor min_data; + phi::DenseTensor min_data; framework::TensorCopy( *min_tensor, platform::CPUPlace(), @@ -86,7 +84,7 @@ class ClipGradNPUKernel : public framework::OpKernel { auto max_val = ctx.Attr("max"); if (max_tensor) { - Tensor max_data; + phi::DenseTensor max_data; framework::TensorCopy( *max_tensor, platform::CPUPlace(), diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 6bdfe9e8b754f..75e6df4baf82b 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -61,7 +61,7 @@ struct FillConstantVisitor { * = nullptr) const { #ifdef PADDLE_WITH_ASCEND_CL if (platform::is_npu_place(dev_ctx_.GetPlace())) { - Tensor tensor_tmp(framework::TransToPhiDataType(dtype_)); + phi::DenseTensor tensor_tmp(framework::TransToPhiDataType(dtype_)); tensor_tmp.mutable_data({1}, context_.GetPlace()); FillNpuTensorWithConstant(&tensor_tmp, static_cast(value_)); diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 8d3af26f0c254..6920d51eb2637 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -151,10 +151,9 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx, aclrtStream stream, const phi::DenseTensor* in) { - using Tensor = phi::DenseTensor; - Tensor out(in->type()); + phi::DenseTensor out(in->type()); - Tensor mean(in->type()); + phi::DenseTensor mean(in->type()); mean.Resize({1}); mean.mutable_data(dev_ctx.GetPlace()); std::vector axes; diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index 0881b702ec0d8..40a0cb196f3bb 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; @@ -126,7 +124,7 @@ struct CSoftmaxWithCrossEntropyFunctor { const int N = phi::funcs::SizeToAxis(axis, logits_dims); const int D = phi::funcs::SizeFromAxis(axis, logits_dims); - Tensor logits_2d, softmax_2d, loss_2d; + phi::DenseTensor logits_2d, softmax_2d, loss_2d; logits_2d.ShareDataWith(*logits).Resize({N, D}); softmax_2d.ShareDataWith(*softmax).Resize({N, D}); loss_2d.ShareDataWith(*loss).Resize({N, 1}); @@ -135,7 +133,7 @@ struct CSoftmaxWithCrossEntropyFunctor { auto eigen_softmax = math::EigenMatrix::From(softmax_2d); // step 1, obtain logit_max - Tensor logits_max; + phi::DenseTensor logits_max; logits_max = ctx.AllocateTmpTensor({N, 1}, dev_ctx); void* logits_max_buff = logits_max.mutable_data(place); @@ -163,7 +161,7 @@ struct CSoftmaxWithCrossEntropyFunctor { .unaryExpr(math::ValueClip()); // step 3, obtain predict target - Tensor predicted_logits; + phi::DenseTensor predicted_logits; predicted_logits = ctx.AllocateTmpTensor({N, 1}, dev_ctx); predicted_logits.mutable_data(place); @@ -215,7 +213,7 @@ struct CSoftmaxWithCrossEntropyFunctor { eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp(); // step 5, obtain sum_exp_logits - Tensor sum_exp_logits; + phi::DenseTensor sum_exp_logits; sum_exp_logits = ctx.AllocateTmpTensor({N, 1}, dev_ctx); void* sum_exp_logits_buff = sum_exp_logits.mutable_data(place); @@ -278,7 +276,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor { const int N = phi::funcs::SizeToAxis(axis, logits_dims); const int D = phi::funcs::SizeFromAxis(axis, logits_dims); - Tensor logits_2d, softmax_2d, loss_2d; + phi::DenseTensor logits_2d, softmax_2d, loss_2d; logits_2d.ShareDataWith(*logits).Resize({N, D}); softmax_2d.ShareDataWith(*softmax).Resize({N, D}); loss_2d.ShareDataWith(*loss).Resize({N, 1}); @@ -287,7 +285,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor { auto eigen_softmax = math::EigenMatrix::From(softmax_2d); // step 1, obtain logit_max - Tensor logits_max; + phi::DenseTensor logits_max; logits_max = ctx.AllocateTmpTensor({N, 1}, dev_ctx); auto eigen_logits_max = math::EigenMatrix::From(logits_max); @@ -309,7 +307,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor { .unaryExpr(math::ValueClip()); // step 3, obtain predict target - Tensor predicted_logits; + phi::DenseTensor predicted_logits; predicted_logits = ctx.AllocateTmpTensor({N, 1}, dev_ctx); predicted_logits.mutable_data(place); @@ -355,7 +353,7 @@ struct CSoftmaxWithCrossEntropyProcessGroupFunctor { eigen_softmax.device(*dev_ctx.eigen_device()) = eigen_softmax.exp(); // step 5, obtain sum_exp_logits - Tensor sum_exp_logits; + phi::DenseTensor sum_exp_logits; sum_exp_logits = ctx.AllocateTmpTensor({N, 1}, dev_ctx); void* sum_exp_logits_buff = sum_exp_logits.mutable_data(place); @@ -405,7 +403,7 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims); const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims); - Tensor logit_grad_2d; + phi::DenseTensor logit_grad_2d; logit_grad_2d.ShareDataWith(*logit_grad).Resize({N, D}); int blocks = NumBlocks(N * D); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index ae65930b86ac0..0c6e7b31c9d2e 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class ConcatOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc index b73460f2057e4..ebfd2895e783b 100644 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -119,7 +119,7 @@ class ConcatGradMLUKernel : public framework::OpKernel { out_grad->dims().size())); // get output tensor that the name is not kEmptyVarName std::vector outputs_vec; - std::vector tmp_outputs_vec; + std::vector tmp_outputs_vec; std::vector output_descs; std::vector descs_vec; for (size_t j = 0; j < outs.size(); ++j) { @@ -129,7 +129,7 @@ class ConcatGradMLUKernel : public framework::OpKernel { output_descs.emplace_back(MLUCnnlTensorDesc(*outs[j])); outputs_vec.push_back(GetBasePtr(outs[j])); } else { - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.mutable_data(ins[j]->dims(), ctx.GetPlace()); tmp_outputs_vec.push_back(tmp_tensor); output_descs.emplace_back(MLUCnnlTensorDesc(*ins[j])); diff --git a/paddle/fluid/operators/controlflow/logical_op_mlu.cc b/paddle/fluid/operators/controlflow/logical_op_mlu.cc index 5e1630447b9de..7f63513af7bac 100644 --- a/paddle/fluid/operators/controlflow/logical_op_mlu.cc +++ b/paddle/fluid/operators/controlflow/logical_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class LogicalMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/controlflow/logical_op_npu.cc b/paddle/fluid/operators/controlflow/logical_op_npu.cc index 7c2c11bbfb40e..38ffa202efa92 100644 --- a/paddle/fluid/operators/controlflow/logical_op_npu.cc +++ b/paddle/fluid/operators/controlflow/logical_op_npu.cc @@ -15,8 +15,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class LogicalNotNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 924ed1fcf7d35..62bcfb545e00f 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -29,8 +29,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - // Base convolution operator definations for other conv // like operators to reuse the implementation. inline int ConvOutputSize( diff --git a/paddle/fluid/operators/conv_op_mlu.cc b/paddle/fluid/operators/conv_op_mlu.cc index d0067d5c5930a..214af06bbd7c7 100644 --- a/paddle/fluid/operators/conv_op_mlu.cc +++ b/paddle/fluid/operators/conv_op_mlu.cc @@ -18,7 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template @@ -56,8 +55,8 @@ class MLUConvOpKernel : public framework::OpKernel { UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_tensor(output->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_tensor(output->type()); const std::vector perm_to_nhwc = {0, 2, 3, 1}; if (channel_last) { input_tensor.ShareDataWith(*input); @@ -78,7 +77,7 @@ class MLUConvOpKernel : public framework::OpKernel { output_tensor.set_layout(DataLayout::kNHWC); // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, @@ -166,8 +165,8 @@ class MLUConvGradOpKernel : public framework::OpKernel { UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_grad_tensor(output_grad->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_grad_tensor(output_grad->type()); const std::vector perm_to_nhwc = {0, 2, 3, 1}; const std::vector perm_to_nchw = {0, 3, 1, 2}; if (channel_last) { @@ -193,7 +192,7 @@ class MLUConvGradOpKernel : public framework::OpKernel { filter_grad->mutable_data(ctx.GetPlace()); auto filter_grad_dims = filter_grad->dims(); - Tensor temp_filter_grad(filter_grad->type()); + phi::DenseTensor temp_filter_grad(filter_grad->type()); temp_filter_grad.mutable_data({filter_grad_dims[0], filter_grad_dims[2], filter_grad_dims[3], @@ -234,7 +233,7 @@ class MLUConvGradOpKernel : public framework::OpKernel { if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); - Tensor input_grad_tensor(input_grad->type()); + phi::DenseTensor input_grad_tensor(input_grad->type()); if (channel_last) { input_grad_tensor.ShareDataWith(*input_grad); } else { @@ -248,7 +247,7 @@ class MLUConvGradOpKernel : public framework::OpKernel { input_grad_tensor.set_layout(DataLayout::kNHWC); // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, @@ -326,8 +325,8 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel { UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_tensor(output->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_tensor(output->type()); const std::vector perm_to_nhwc = {0, 2, 3, 1}; if (channel_last) { groups = in_dims[3]; @@ -350,7 +349,7 @@ class MLUDepthwiseConvOpKernel : public framework::OpKernel { output_tensor.set_layout(DataLayout::kNHWC); // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, @@ -438,8 +437,8 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_grad_tensor(output_grad->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_grad_tensor(output_grad->type()); const std::vector perm_to_nhwc = {0, 2, 3, 1}; const std::vector perm_to_nchw = {0, 3, 1, 2}; const std::vector perm_hwcm_to_mchw = {3, 2, 0, 1}; @@ -469,7 +468,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { filter_grad->mutable_data(ctx.GetPlace()); auto filter_grad_dims = filter_grad->dims(); - Tensor temp_filter_grad(filter_grad->type()); + phi::DenseTensor temp_filter_grad(filter_grad->type()); // Details about setting diff_w hwcn for better performance, see the CNNL // documentation. temp_filter_grad.mutable_data({filter_grad_dims[perm_mchw_to_hwcm[0]], @@ -512,7 +511,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); - Tensor input_grad_tensor(input_grad->type()); + phi::DenseTensor input_grad_tensor(input_grad->type()); if (channel_last) { input_grad_tensor.ShareDataWith(*input_grad); } else { @@ -526,7 +525,7 @@ class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { input_grad_tensor.set_layout(DataLayout::kNHWC); // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, diff --git a/paddle/fluid/operators/conv_op_npu.cc b/paddle/fluid/operators/conv_op_npu.cc index f4c7de95483b5..6b8f7118473a5 100644 --- a/paddle/fluid/operators/conv_op_npu.cc +++ b/paddle/fluid/operators/conv_op_npu.cc @@ -18,7 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; static void CastToFP16(const framework::ExecutionContext& ctx, const aclrtStream& stream, @@ -104,7 +103,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel { std::vector strides(4, 1); std::vector dilations(4, 1); - Tensor input_tensor, output_tensor; + phi::DenseTensor input_tensor, output_tensor; input_tensor.ShareDataWith(*input); output_tensor.ShareDataWith(*output); @@ -125,7 +124,7 @@ class DepthwiseConvNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); // Transform filter (n, 1, h, w) --> (1, n, h, w) - Tensor transformed_filter(filter->type()); + phi::DenseTensor transformed_filter(filter->type()); transformed_filter.mutable_data({filter->dims()[1], filter->dims()[0], filter->dims()[2], @@ -189,7 +188,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); // Transform filter (n, 1, h, w) --> (1, n, h, w) - Tensor transformed_filter(filter->type()); + phi::DenseTensor transformed_filter(filter->type()); transformed_filter.mutable_data({filter->dims()[1], filter->dims()[0], filter->dims()[2], @@ -204,7 +203,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel { std::vector strides(4, 1); std::vector dilations(4, 1); - Tensor input_tensor, output_grad_tensor; + phi::DenseTensor input_tensor, output_grad_tensor; input_tensor.ShareDataWith(*input); output_grad_tensor.ShareDataWith(*output_grad); if (channel_last) { @@ -247,7 +246,7 @@ class DepthwiseConvGradNPUKernel : public framework::OpKernel { } if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); - Tensor input_grad_tensor; + phi::DenseTensor input_grad_tensor; input_grad_tensor.ShareDataWith(*input_grad); if (channel_last) { input_grad_tensor.set_layout(DataLayout::kNHWC); @@ -305,7 +304,7 @@ class NPUConvOpKernel : public framework::OpKernel { std::vector strides_vec(4, 1); std::vector dilations_vec(4, 1); - Tensor input_tensor, output_tensor; + phi::DenseTensor input_tensor, output_tensor; input_tensor.ShareDataWith(*input); output_tensor.ShareDataWith(*output); if (channel_last) { @@ -378,7 +377,7 @@ class NPUConvGradOpKernel : public framework::OpKernel { std::vector strides_vec(4, 1); std::vector dilations_vec(4, 1); - Tensor input_tensor, output_grad_tensor; + phi::DenseTensor input_tensor, output_grad_tensor; input_tensor.ShareDataWith(*input); output_grad_tensor.ShareDataWith(*output_grad); if (channel_last) { @@ -400,7 +399,7 @@ class NPUConvGradOpKernel : public framework::OpKernel { filter_grad->mutable_data(ctx.GetPlace()); std::vector filter_shape_vec = phi::vectorize(filter->dims()); - Tensor filter_grad_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor filter_grad_fp32(experimental::DataType::FLOAT32); filter_grad_fp32.Resize(filter_grad->dims()); if (framework::TransToProtoVarType(input->dtype()) == @@ -430,7 +429,7 @@ class NPUConvGradOpKernel : public framework::OpKernel { input_grad->mutable_data(ctx.GetPlace()); std::vector input_shape_vec = phi::vectorize(input->dims()); - Tensor input_grad_tensor; + phi::DenseTensor input_grad_tensor; input_grad_tensor.ShareDataWith(*input_grad); if (channel_last) { input_grad_tensor.set_layout(DataLayout::kNHWC); @@ -617,8 +616,9 @@ class NPUConv3dGradKernel : public framework::OpKernel { filter_grad->mutable_data(ctx.GetPlace()); std::vector filter_shape_vec = phi::vectorize(filter->dims()); - Tensor filter_grad_tensor = ctx.AllocateTmpTensor( - filter_grad->dims(), dev_ctx); + phi::DenseTensor filter_grad_tensor = + ctx.AllocateTmpTensor(filter_grad->dims(), + dev_ctx); filter_grad_tensor.ShareDataWith(*filter_grad); filter_grad_tensor.set_layout(DataLayout::kNCDHW); @@ -638,8 +638,9 @@ class NPUConv3dGradKernel : public framework::OpKernel { input_grad->mutable_data(ctx.GetPlace()); std::vector input_shape_vec = phi::vectorize(input->dims()); - Tensor input_grad_tensor = ctx.AllocateTmpTensor( - input_grad->dims(), dev_ctx); + phi::DenseTensor input_grad_tensor = + ctx.AllocateTmpTensor(input_grad->dims(), + dev_ctx); input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.set_layout(DataLayout::kNCDHW); diff --git a/paddle/fluid/operators/conv_transpose_op_mlu.cc b/paddle/fluid/operators/conv_transpose_op_mlu.cc index c2d68523d48cc..36d0be10575d1 100644 --- a/paddle/fluid/operators/conv_transpose_op_mlu.cc +++ b/paddle/fluid/operators/conv_transpose_op_mlu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template @@ -61,8 +60,8 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel { phi::UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_tensor(output->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_tensor(output->type()); input_tensor.set_layout(DataLayout::kNHWC); output_tensor.set_layout(DataLayout::kNHWC); const std::vector perm_to_nhwc = {0, 2, 3, 1}; @@ -84,7 +83,7 @@ class Conv2DTransposeMLUKernel : public framework::OpKernel { } // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, @@ -168,8 +167,8 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel { phi::UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); - Tensor input_tensor(input->type()); - Tensor output_grad_tensor(output_grad->type()); + phi::DenseTensor input_tensor(input->type()); + phi::DenseTensor output_grad_tensor(output_grad->type()); output_grad_tensor.set_layout(DataLayout::kNHWC); const std::vector perm_to_nhwc = {0, 2, 3, 1}; @@ -191,7 +190,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel { } // transpose filter from MCHW to MHWC - Tensor trans_filter(filter->type()); + phi::DenseTensor trans_filter(filter->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, @@ -217,7 +216,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel { if (filter_grad) { filter_grad->mutable_data(ctx.GetPlace()); - Tensor filter_grad_tensor(filter_grad->type()); + phi::DenseTensor filter_grad_tensor(filter_grad->type()); // filter_grad always MCHW // filter_grad_tensor always MHWC auto filter_grad_dims = filter_grad->dims(); @@ -253,7 +252,7 @@ class Conv2DTransposeGradMLUKernel : public framework::OpKernel { if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); - Tensor input_grad_tensor(input_grad->type()); + phi::DenseTensor input_grad_tensor(input_grad->type()); input_tensor.set_layout(DataLayout::kNHWC); if (channel_last) { diff --git a/paddle/fluid/operators/conv_transpose_op_npu.cc b/paddle/fluid/operators/conv_transpose_op_npu.cc index 2f674de03f7a2..3723a4841af30 100644 --- a/paddle/fluid/operators/conv_transpose_op_npu.cc +++ b/paddle/fluid/operators/conv_transpose_op_npu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -65,7 +64,7 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel { std::vector strides(4, 1); std::vector dilations(4, 1); - Tensor input_tensor, output_tensor; + phi::DenseTensor input_tensor, output_tensor; input_tensor.ShareDataWith(*input); output_tensor.ShareDataWith(*output); @@ -148,7 +147,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel { std::vector strides_vec(4, 1); std::vector dilations_vec(4, 1); - Tensor input_tensor, output_grad_tensor; + phi::DenseTensor input_tensor, output_grad_tensor; input_tensor.ShareDataWith(*input); output_grad_tensor.ShareDataWith(*output_grad); if (channel_last) { @@ -182,7 +181,7 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel { } if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); - Tensor input_grad_tensor; + phi::DenseTensor input_grad_tensor; input_grad_tensor.ShareDataWith(*input_grad); if (channel_last) { input_grad_tensor.set_layout(DataLayout::kNHWC); @@ -248,7 +247,7 @@ class Conv3DTransposeNPUKernel : public framework::OpKernel { std::vector strides(5, 1); std::vector dilations(5, 1); - Tensor input_tensor, output_tensor, filter_tensor; + phi::DenseTensor input_tensor, output_tensor, filter_tensor; input_tensor.Resize(input->dims()); input_tensor.ShareDataWith(*input); output_tensor.Resize(output->dims()); diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc index a36e9b73639ba..56f334b66571d 100644 --- a/paddle/fluid/operators/copy_cross_scope_op.cc +++ b/paddle/fluid/operators/copy_cross_scope_op.cc @@ -30,8 +30,6 @@ class OpBase; } // namespace imperative } // namespace paddle -using Tensor = phi::DenseTensor; - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index 5587b595cd470..2b3450d031607 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - inline std::vector CorrelationOutputSize(int batch, int input_height, int input_width, diff --git a/paddle/fluid/operators/cos_sim_op.h b/paddle/fluid/operators/cos_sim_op.h index e1935f0dae2ad..5d4f11a876585 100644 --- a/paddle/fluid/operators/cos_sim_op.h +++ b/paddle/fluid/operators/cos_sim_op.h @@ -21,13 +21,11 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CosSimKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - // get Tensor + // get phi::DenseTensor auto* in_x = context.Input("X"); auto* in_y = context.Input("Y"); auto* out_z = context.Output("Out"); @@ -74,7 +72,7 @@ template class CosSimGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - // get Tensor + // get phi::DenseTensor auto* in_x = context.Input("X"); auto* in_y = context.Input("Y"); auto* in_z = context.Input("Out"); diff --git a/paddle/fluid/operators/crop_op_npu.cc b/paddle/fluid/operators/crop_op_npu.cc index 8980e5f73dee7..916ad89f1e72c 100644 --- a/paddle/fluid/operators/crop_op_npu.cc +++ b/paddle/fluid/operators/crop_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CropNPUKernel : public framework::OpKernel { public: @@ -71,7 +69,7 @@ class CropNPUKernel : public framework::OpKernel { x->dims().size())); // shape memory maybe have gc. - Tensor tmp_shape(*shape); + phi::DenseTensor tmp_shape(*shape); tmp_shape.mutable_data(ctx.GetPlace()); const auto& runner = @@ -90,7 +88,7 @@ class CropNPUKernel : public framework::OpKernel { "(%d) of the Input(X).", shape_size.size(), x->dims().size())); - Tensor tmp_shape(x->dtype()); + phi::DenseTensor tmp_shape(x->dtype()); tmp_shape.Resize(phi::make_ddim(shape_size)); tmp_shape.mutable_data(ctx.GetPlace()); const auto& runner = diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 8ae6f448d24ba..c581d33091c02 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CrossEntropyOpKernel : public framework::OpKernel { public: @@ -36,8 +34,8 @@ class CrossEntropyOpKernel : public framework::OpKernel { int rank = x->dims().size(); auto label_dims = labels->dims(); - Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); - Tensor labels_2d, y_2d; + phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); + phi::DenseTensor labels_2d, y_2d; if (label_dims.size() < rank) { labels_2d.ShareDataWith(*labels); labels_2d.Resize({phi::product(label_dims), 1}); diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 9279cf531d449..c3647d6e8c2d7 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CTCAlignKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index d436a4b5d531d..97e5eae62ab3b 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -26,8 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template bool is_continuous(const Type &weight_list) { bool continuous = true; @@ -41,7 +39,7 @@ bool is_continuous(const Type &weight_list) { return continuous; } -int size_sum(const std::vector &weight_list) { +int size_sum(const std::vector &weight_list) { int size = 0; for (size_t i = 0; i < weight_list.size(); ++i) { auto in_size = weight_list[i]->numel(); @@ -53,8 +51,8 @@ int size_sum(const std::vector &weight_list) { template void weight_to_tensor(const platform::Place &place, gpuStream_t stream, - const std::vector &weight_list, - Tensor *weight) { + const std::vector &weight_list, + phi::DenseTensor *weight) { auto weight_data = weight->data(); int weight_offset = 0; for (size_t i = 0; i < weight_list.size(); ++i) { @@ -72,11 +70,12 @@ void weight_to_tensor(const platform::Place &place, } template -void weight_to_tensor_list(const platform::Place &place, - gpuStream_t stream, - std::vector *weight_grad, - const std::vector &weight_input, - const Tensor *weight) { +void weight_to_tensor_list( + const platform::Place &place, + gpuStream_t stream, + std::vector *weight_grad, + const std::vector &weight_input, + const phi::DenseTensor *weight) { int weight_offset = 0; auto *weight_data = weight->data(); for (size_t i = 0; i < weight_input.size(); ++i) { @@ -204,15 +203,15 @@ template class CudnnLSTMGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const Tensor *x = ctx.Input("Input"); - const Tensor *init_h = ctx.Input("InitH"); - const Tensor *init_c = ctx.Input("InitC"); + const phi::DenseTensor *x = ctx.Input("Input"); + const phi::DenseTensor *init_h = ctx.Input("InitH"); + const phi::DenseTensor *init_c = ctx.Input("InitC"); - Tensor *out = ctx.Output("Out"); - Tensor *last_h = ctx.Output("LastH"); - Tensor *last_c = ctx.Output("LastC"); - Tensor *reserve = ctx.Output("Reserve"); - Tensor *state_out = ctx.Output("StateOut"); + phi::DenseTensor *out = ctx.Output("Out"); + phi::DenseTensor *last_h = ctx.Output("LastH"); + phi::DenseTensor *last_c = ctx.Output("LastC"); + phi::DenseTensor *reserve = ctx.Output("Reserve"); + phi::DenseTensor *state_out = ctx.Output("StateOut"); const T *x_data = x->data(); const T *init_h_data = init_h->data(); @@ -256,7 +255,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { size_t workspace_size; size_t reserve_size; - Tensor weight_whole; + phi::DenseTensor weight_whole; T *w_data = nullptr; int weight_numel; bool w_initialized = false; @@ -272,7 +271,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { if (!w_initialized) { auto weight_list = ctx.MultiInput("WeightList"); bool continuous = - is_continuous>(weight_list); + is_continuous>(weight_list); weight_numel = size_sum(weight_list); if (!continuous) { @@ -288,7 +287,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { for (size_t i = 0; i < weight_list.size(); ++i) { size_t len = weight_list[i]->numel(); auto dim = weight_list[i]->dims(); - const_cast(weight_list[i]) + const_cast(weight_list[i]) ->ShareDataWith( weight_whole.Slice(static_cast(offset), static_cast(offset + len))) @@ -481,12 +480,12 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { auto place = ctx.GetPlace(); int weight_numel = size_sum(weight_list); bool continuous = - is_continuous>(weight_list); + is_continuous>(weight_list); auto stream = reinterpret_cast(ctx.device_context()) .stream(); - Tensor weight_whole; + phi::DenseTensor weight_whole; T *weight_data = nullptr; if (!continuous) { @@ -497,7 +496,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { weight_data = const_cast(weight_list[0]->data()); } - Tensor weight_grad; + phi::DenseTensor weight_grad; phi::funcs::SetConstant zero; weight_grad.mutable_data({weight_numel}, ctx.GetPlace()); zero(dev_ctx, &weight_grad, static_cast(0.0)); @@ -559,7 +558,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { SequenceLength, &workspace_size, &reserve_size, - const_cast(state_out)); + const_cast(state_out)); phi::DenseTensor workspace_data_; workspace_data_.mutable_data( diff --git a/paddle/fluid/operators/cumsum_op_mlu.cc b/paddle/fluid/operators/cumsum_op_mlu.cc index 83d9a10af1730..fb586b9585e03 100644 --- a/paddle/fluid/operators/cumsum_op_mlu.cc +++ b/paddle/fluid/operators/cumsum_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CumSumMLUKernel : public framework::OpKernel { public: @@ -34,7 +32,7 @@ class CumSumMLUKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); phi::DenseTensor* input_ptr = const_cast(x); - Tensor flat_x(x->type()); + phi::DenseTensor flat_x(x->type()); if (flatten) { PADDLE_ENFORCE_EQ( axis, diff --git a/paddle/fluid/operators/cumsum_op_npu.cc b/paddle/fluid/operators/cumsum_op_npu.cc index 672a59cf22f59..7126e7ca4cbaf 100644 --- a/paddle/fluid/operators/cumsum_op_npu.cc +++ b/paddle/fluid/operators/cumsum_op_npu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static void CumsumImp(const phi::DenseTensor& input, phi::DenseTensor* output, const framework::NPUAttributeMap& attr_input, @@ -30,7 +28,7 @@ static void CumsumImp(const phi::DenseTensor& input, .stream(); if (framework::TransToProtoVarType(input.dtype()) == framework::proto::VarType::INT64) { - Tensor tmp_input; + phi::DenseTensor tmp_input; tmp_input.mutable_data(input.dims(), ctx.GetPlace()); auto dst_acl_dtype = ConvertToNpuDtype(framework::TransToProtoVarType(tmp_input.type())); @@ -41,7 +39,7 @@ static void CumsumImp(const phi::DenseTensor& input, {{"dst_type", static_cast(dst_acl_dtype)}}); cast_runner_1.Run(stream); - Tensor tmp_output; + phi::DenseTensor tmp_output; tmp_output.mutable_data(output->dims(), ctx.GetPlace()); const auto& runner = NpuOpRunner("CumsumD", {tmp_input}, {tmp_output}, attr_input); @@ -86,7 +84,7 @@ class CumSumNPUKernel : public framework::OpKernel { -1, axis)); - Tensor new_x(x->type()); + phi::DenseTensor new_x(x->type()); new_x.ShareDataWith(*x); new_x.Resize(phi::make_ddim({x->numel()})); diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index 153b181b4fd6a..11af33df2f61b 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class CVMOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index 5cac5392f4abb..400e025f82030 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -22,7 +22,6 @@ namespace paddle { namespace operators { using phi::PADDLE_CUDA_NUM_THREADS; -using Tensor = phi::DenseTensor; template __global__ void CvmComputeKernel(const bool use_cvm, diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h index 9bd5a00b3733f..461575d25b75d 100644 --- a/paddle/fluid/operators/cvm_op.h +++ b/paddle/fluid/operators/cvm_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void CvmComputeKernel(const bool use_cvm, const int64_t item_width, diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index 36dc93445df59..6770a7e31c1a5 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template @@ -483,9 +482,9 @@ class DataNormGradOp : public framework::OperatorWithKernel { PADDLE_THROW(platform::errors::InvalidArgument( "Y@GRAD can not be found for computation")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } @@ -523,7 +522,7 @@ class DataNormGradKernel : public framework::OpKernel { (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); // init output - Tensor *d_x = nullptr; + phi::DenseTensor *d_x = nullptr; if (ctx.HasOutput(framework::GradVarName("X"))) { d_x = ctx.Output(framework::GradVarName("X")); } @@ -587,12 +586,12 @@ class DataNormGradKernel : public framework::OpKernel { EigenVectorArrayMap d_bias_arr(d_bias_data, C); EigenVectorArrayMap d_scale_arr(d_scale_data, C); - Tensor dy_sum; + phi::DenseTensor dy_sum; dy_sum.Resize({C}); dy_sum.mutable_data(ctx.GetPlace()); EigenVectorArrayMap dy_sum_arr( dy_sum.mutable_data(ctx.GetPlace()), C); - Tensor dy_mul_x_sub_mean_mul_invstd_sum; + phi::DenseTensor dy_mul_x_sub_mean_mul_invstd_sum; dy_mul_x_sub_mean_mul_invstd_sum.Resize({C}); dy_mul_x_sub_mean_mul_invstd_sum.mutable_data(ctx.GetPlace()); EigenVectorArrayMap dy_mul_x_sub_mean_mul_invstd_sum_arr( diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index 1b895b0c8daa5..aaccaecc72067 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; using phi::PADDLE_CUDA_NUM_THREADS; @@ -166,7 +165,7 @@ class DataNormGradKernel : public framework::OpKernel { const int C = x_dims[1]; // init output - Tensor *d_x = nullptr; + phi::DenseTensor *d_x = nullptr; if (ctx.HasOutput(framework::GradVarName("X"))) { d_x = ctx.Output(framework::GradVarName("X")); } diff --git a/paddle/fluid/operators/deformable_conv_op_mlu.cc b/paddle/fluid/operators/deformable_conv_op_mlu.cc index 08969ba98fcd2..f5814efb3f491 100644 --- a/paddle/fluid/operators/deformable_conv_op_mlu.cc +++ b/paddle/fluid/operators/deformable_conv_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class DeformableConvMLUKernel : public framework::OpKernel { public: @@ -58,29 +56,29 @@ class DeformableConvMLUKernel : public framework::OpKernel { im2col_step); const std::vector perm_to_nhwc = {0, 2, 3, 1}; - Tensor trans_input(input->dtype()); + phi::DenseTensor trans_input(input->dtype()); TransposeFromMLUTensor( ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - Tensor trans_offset(offset->dtype()); + phi::DenseTensor trans_offset(offset->dtype()); TransposeFromMLUTensor(ctx, perm_to_nhwc, offset, &trans_offset, true /*need_reshape_or_alloc*/); - Tensor trans_mask(mask->dtype()); + phi::DenseTensor trans_mask(mask->dtype()); TransposeFromMLUTensor( ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); - Tensor trans_filter(filter->dtype()); + phi::DenseTensor trans_filter(filter->dtype()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, &trans_filter, true /*need_reshape_or_alloc*/); - Tensor tmp_output(output->dtype()); + phi::DenseTensor tmp_output(output->dtype()); auto output_dims = output->dims(); tmp_output.mutable_data( {output_dims[0], output_dims[2], output_dims[3], output_dims[1]}, @@ -167,54 +165,54 @@ class DeformableConvGradMLUKernel : public framework::OpKernel { groups, im2col_step); - Tensor tmp_input_grad; + phi::DenseTensor tmp_input_grad; auto input_dims = input->dims(); tmp_input_grad.mutable_data( {input_dims[0], input_dims[2], input_dims[3], input_dims[1]}, ctx.GetPlace()); - Tensor tmp_filter_grad; + phi::DenseTensor tmp_filter_grad; auto filter_dims = filter->dims(); tmp_filter_grad.mutable_data( {filter_dims[0], filter_dims[2], filter_dims[3], filter_dims[1]}, ctx.GetPlace()); - Tensor tmp_offset_grad; + phi::DenseTensor tmp_offset_grad; auto offset_dims = offset->dims(); tmp_offset_grad.mutable_data( {offset_dims[0], offset_dims[2], offset_dims[3], offset_dims[1]}, ctx.GetPlace()); - Tensor tmp_mask_grad; + phi::DenseTensor tmp_mask_grad; auto mask_dims = mask->dims(); tmp_mask_grad.mutable_data( {mask_dims[0], mask_dims[2], mask_dims[3], mask_dims[1]}, ctx.GetPlace()); const std::vector perm_to_nhwc = {0, 2, 3, 1}; - Tensor trans_output_grad(output_grad->dtype()); + phi::DenseTensor trans_output_grad(output_grad->dtype()); TransposeFromMLUTensor(ctx, perm_to_nhwc, output_grad, &trans_output_grad, true /*need_reshape_or_alloc*/); - Tensor trans_input(input->dtype()); + phi::DenseTensor trans_input(input->dtype()); TransposeFromMLUTensor( ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - Tensor trans_offset(offset->dtype()); + phi::DenseTensor trans_offset(offset->dtype()); TransposeFromMLUTensor(ctx, perm_to_nhwc, offset, &trans_offset, true /*need_reshape_or_alloc*/); - Tensor trans_mask(mask->dtype()); + phi::DenseTensor trans_mask(mask->dtype()); TransposeFromMLUTensor( ctx, perm_to_nhwc, mask, &trans_mask, true /*need_reshape_or_alloc*/); - Tensor trans_filter(filter->dtype()); + phi::DenseTensor trans_filter(filter->dtype()); TransposeFromMLUTensor(ctx, perm_to_nhwc, filter, diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index 80d248b818b4f..0e8c736431b11 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -39,7 +39,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using phi::PADDLE_CUDA_NUM_THREADS; static inline int GET_BLOCKS(const int N) { diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index 231d14e537b54..dabb69b5af8c1 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -33,8 +33,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template T bilinear_interp( const T* data, const T x, const T y, const int width, const int height) { @@ -518,7 +516,7 @@ class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { const int num_classes = no_trans ? 1 : channels_trans / 2; const int channels_each_class = no_trans ? output_dim : output_dim / num_classes; - Tensor roi_batch_id_list; + phi::DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({num_rois}); int* roi_batch_id_data = roi_batch_id_list.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/bbox_util.cu.h b/paddle/fluid/operators/detection/bbox_util.cu.h index a9ad6cdfb659d..adb60a8a8d064 100644 --- a/paddle/fluid/operators/detection/bbox_util.cu.h +++ b/paddle/fluid/operators/detection/bbox_util.cu.h @@ -30,8 +30,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) int const kThreadsPerBlock = sizeof(uint64_t) * 8; @@ -47,11 +45,11 @@ struct RangeInitFunctor { template static void SortDescending(const phi::GPUContext &ctx, - const Tensor &value, - Tensor *value_out, - Tensor *index_out) { + const phi::DenseTensor &value, + phi::DenseTensor *value_out, + phi::DenseTensor *index_out) { int num = static_cast(value.numel()); - Tensor index_in_t; + phi::DenseTensor index_in_t; int *idx_in = index_in_t.mutable_data({num}, ctx.GetPlace()); platform::ForRange for_range(ctx, num); for_range(RangeInitFunctor{0, 1, idx_in}); @@ -287,10 +285,10 @@ static __global__ void NMSKernel(const int n_boxes, template static void NMS(const phi::GPUContext &ctx, - const Tensor &proposals, - const Tensor &sorted_indices, + const phi::DenseTensor &proposals, + const phi::DenseTensor &sorted_indices, const T nms_threshold, - Tensor *keep_out, + phi::DenseTensor *keep_out, bool pixel_offset = true) { int boxes_num = proposals.dims()[0]; const int col_blocks = DIVUP(boxes_num, kThreadsPerBlock); diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index 35368d0034221..583122b473d26 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class BipartiteMatchOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -234,7 +232,7 @@ class BipartiteMatchKernel : public framework::OpKernel { auto lod = dist_mat->lod().back(); for (size_t i = 0; i < lod.size() - 1; ++i) { if (lod[i + 1] > lod[i]) { - Tensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); + phi::DenseTensor one_ins = dist_mat->Slice(lod[i], lod[i + 1]); BipartiteMatch(one_ins, indices + i * col, dist + i * col); if (type == "per_prediction") { ArgMaxMatch(one_ins, indices + i * col, dist + i * col, threshold); diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 089f2f5569234..79f3b18b2dfce 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoDTenso = phi::DenseTensor; static constexpr int ImInfoSize = 3; diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index bb72ca194b54c..cb067f91662ed 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class BoxClipKernel : public framework::OpKernel { public: @@ -42,9 +40,10 @@ class BoxClipKernel : public framework::OpKernel { auto box_lod = input_box->lod().back(); int64_t n = static_cast(box_lod.size() - 1); for (int i = 0; i < n; ++i) { - Tensor im_info_slice = im_info->Slice(i, i + 1); - Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]); - Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]); + phi::DenseTensor output_slice = + output_box->Slice(box_lod[i], box_lod[i + 1]); ClipTiledBoxes(dev_ctx, im_info_slice, box_slice, &output_slice); } } diff --git a/paddle/fluid/operators/detection/box_coder_op_npu.cc b/paddle/fluid/operators/detection/box_coder_op_npu.cc index 865f218170f45..089f58558ae73 100644 --- a/paddle/fluid/operators/detection/box_coder_op_npu.cc +++ b/paddle/fluid/operators/detection/box_coder_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct BoxCoderFunction { public: @@ -28,31 +26,31 @@ struct BoxCoderFunction { stream = ctx.template device_context() .stream(); } - Tensor Adds(const phi::DenseTensor& x, float scalar) { - Tensor y; + phi::DenseTensor Adds(const phi::DenseTensor& x, float scalar) { + phi::DenseTensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}}); runner.Run(stream); return y; } - Tensor Muls(const phi::DenseTensor& x, float scalar) { - Tensor y; + phi::DenseTensor Muls(const phi::DenseTensor& x, float scalar) { + phi::DenseTensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}}); runner.Run(stream); return y; } - Tensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { - Tensor z; + phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor z; z.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {}); runner.Run(stream); return z; } - Tensor SubWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - Tensor z; + phi::DenseTensor SubWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, + const framework::DDim& shape) { + phi::DenseTensor z; z.mutable_data(shape, place); const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {}); runner.Run(stream); @@ -66,10 +64,10 @@ struct BoxCoderFunction { const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor DivWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - Tensor z; + phi::DenseTensor DivWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, + const framework::DDim& shape) { + phi::DenseTensor z; DivWithBroadCastVoid(x, y, shape, &z); return z; } @@ -81,10 +79,10 @@ struct BoxCoderFunction { const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor MulWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - Tensor z; + phi::DenseTensor MulWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, + const framework::DDim& shape) { + phi::DenseTensor z; MulWithBroadCastVoid(x, y, shape, &z); return z; } @@ -96,36 +94,36 @@ struct BoxCoderFunction { const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor AddWithBroadCast(const phi::DenseTensor& x, - const phi::DenseTensor& y, - const framework::DDim& shape) { - Tensor z; + phi::DenseTensor AddWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, + const framework::DDim& shape) { + phi::DenseTensor z; AddWithBroadCastVoid(x, y, shape, &z); return z; } - Tensor Abs(const phi::DenseTensor& x) { - Tensor y; + phi::DenseTensor Abs(const phi::DenseTensor& x) { + phi::DenseTensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Abs", {x}, {y}, {}); runner.Run(stream); return y; } - Tensor Log(const phi::DenseTensor& x) { - Tensor t_x_m1 = Adds(x, -1); - Tensor y; + phi::DenseTensor Log(const phi::DenseTensor& x) { + phi::DenseTensor t_x_m1 = Adds(x, -1); + phi::DenseTensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {}); runner.Run(stream); return y; } - Tensor Exp(const phi::DenseTensor& x) { - Tensor y; + phi::DenseTensor Exp(const phi::DenseTensor& x) { + phi::DenseTensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Exp", {x}, {y}, {}); runner.Run(stream); return y; } - Tensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) { auto dim_x = x.dims(); auto dim_y = y.dims(); PADDLE_ENFORCE_EQ( @@ -145,7 +143,7 @@ struct BoxCoderFunction { "got dim_x[1] = %d, dim_y[0] = %d.", dim_x[1], dim_y[0])); - Tensor z; + phi::DenseTensor z; z.mutable_data({dim_x[0], dim_y[1]}, place); const auto& runner = NpuOpRunner("MatMul", @@ -155,7 +153,7 @@ struct BoxCoderFunction { runner.Run(stream); return z; } - void ConcatVoid(const std::vector& inputs, + void ConcatVoid(const std::vector& inputs, const framework::DDim& shape_out, int axis, phi::DenseTensor* output) { @@ -172,18 +170,18 @@ struct BoxCoderFunction { runner.AddInputNames(names); runner.Run(stream); } - Tensor Concat(const std::vector& inputs, - const framework::DDim& shape_out, - int axis) { - Tensor output; + phi::DenseTensor Concat(const std::vector& inputs, + const framework::DDim& shape_out, + int axis) { + phi::DenseTensor output; ConcatVoid(inputs, shape_out, axis, &output); return output; } - Tensor Slice(const phi::DenseTensor& x, - const std::vector& offsets, - const std::vector& size, - const framework::DDim& shape) { - Tensor y; + phi::DenseTensor Slice(const phi::DenseTensor& x, + const std::vector& offsets, + const std::vector& size, + const framework::DDim& shape) { + phi::DenseTensor y; y.mutable_data(shape, place); const auto& runner = NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}}); @@ -218,8 +216,8 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, auto M = pb->dims()[0]; auto N = tb->dims()[0]; auto shape_0 = phi::make_ddim({4, 2}); - Tensor m_diff; - Tensor m_aver; + phi::DenseTensor m_diff; + phi::DenseTensor m_aver; std::vector vec_diff = {static_cast(-1), static_cast(0), static_cast(0), @@ -240,10 +238,10 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, Vector2Tensor(ctx, vec_aver, shape_0, &m_aver); BoxCoderFunction F(ctx); - Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); - Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); - Tensor tb_xy = F.Dot(*tb, m_aver); - Tensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1)); + phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); + phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); + phi::DenseTensor tb_xy = F.Dot(*tb, m_aver); + phi::DenseTensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1)); pb_xy.Resize({1, M, 2}); pb_wh.Resize({1, M, 2}); @@ -253,15 +251,16 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, auto shape_half = phi::make_ddim({N, M, 2}); auto shape_full = phi::make_ddim({N, M, 4}); - Tensor out_xy_0 = F.DivWithBroadCast( + phi::DenseTensor out_xy_0 = F.DivWithBroadCast( F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half); - Tensor out_wh_0 = F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half))); - Tensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2); + phi::DenseTensor out_wh_0 = + F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half))); + phi::DenseTensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2); if (pbv) { F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out); } else { - Tensor t_var; + phi::DenseTensor t_var; std::vector vec_var(4); for (auto i = 0; i < 4; i++) { vec_var[i] = static_cast(variance[i]); @@ -281,8 +280,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, int axis, phi::DenseTensor* out) { auto shape_0 = phi::make_ddim({4, 2}); - Tensor m_diff; - Tensor m_aver; + phi::DenseTensor m_diff; + phi::DenseTensor m_aver; std::vector vec_diff = {static_cast(-1), static_cast(0), static_cast(0), @@ -303,8 +302,8 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, Vector2Tensor(ctx, vec_aver, shape_0, &m_aver); BoxCoderFunction F(ctx); - Tensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); - Tensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); + phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5)); + phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1)); auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2}) : phi::make_ddim({pb->dims()[0], 1, 2}); pb_xy.Resize(pb_resize_shape); @@ -313,18 +312,22 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2}); std::vector tbox_slice_size = { static_cast(tb->dims()[0]), static_cast(tb->dims()[1]), 2}; - Tensor tbox01 = F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape); - Tensor tbox23 = F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape); + phi::DenseTensor tbox01 = + F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape); + phi::DenseTensor tbox23 = + F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape); - Tensor tb_xy; - Tensor tb_wh; + phi::DenseTensor tb_xy; + phi::DenseTensor tb_wh; if (pbv) { auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2}); auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2}) : phi::make_ddim({pbv->dims()[0], 1, 2}); std::vector pbvt_slice_size = {static_cast(pbv->dims()[0]), 2}; - Tensor pbv_t01 = F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape); - Tensor pbv_t23 = F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape); + phi::DenseTensor pbv_t01 = + F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape); + phi::DenseTensor pbv_t23 = + F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape); pbv_t01.Resize(pbvt_resize_shape); pbv_t23.Resize(pbvt_resize_shape); @@ -345,7 +348,7 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, &tb_xy); F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh); } else { - Tensor t_var01, t_var23; + phi::DenseTensor t_var01, t_var23; auto t_var_shape = phi::make_ddim({1, 1, 2}); std::vector vec_var01 = {static_cast(variance[0]), static_cast(variance[1])}; @@ -366,9 +369,9 @@ void BoxCoderDec(const framework::ExecutionContext& ctx, tbox_slice_shape, &tb_wh); } - Tensor obox01 = + phi::DenseTensor obox01 = F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape); - Tensor obox23 = + phi::DenseTensor obox23 = F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape), (norm ? 0 : -1)); F.ConcatVoid({obox01, obox23}, out->dims(), 2, out); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index 37dc10df7292a..e07e4034f330f 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -16,7 +16,6 @@ limitations under the License.*/ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class CollectFpnProposalsOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index b517f2ec1fdd3..29cf8da067f84 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -33,8 +33,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static constexpr int kNumCUDAThreads = 64; static constexpr int kNumMaxinumNumBlocks = 4096; @@ -74,13 +72,13 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { int real_post_num = min(post_nms_topN, total_roi_num); fpn_rois->mutable_data({real_post_num, kBBoxSize}, dev_ctx.GetPlace()); - Tensor concat_rois; - Tensor concat_scores; + phi::DenseTensor concat_rois; + phi::DenseTensor concat_scores; T* concat_rois_data = concat_rois.mutable_data( {total_roi_num, kBBoxSize}, dev_ctx.GetPlace()); T* concat_scores_data = concat_scores.mutable_data({total_roi_num, 1}, dev_ctx.GetPlace()); - Tensor roi_batch_id_list; + phi::DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({total_roi_num}); int* roi_batch_id_data = roi_batch_id_list.mutable_data(platform::CPUPlace()); @@ -130,20 +128,20 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { } // copy batch id list to GPU - Tensor roi_batch_id_list_gpu; + phi::DenseTensor roi_batch_id_list_gpu; framework::TensorCopy( roi_batch_id_list, dev_ctx.GetPlace(), &roi_batch_id_list_gpu); - Tensor index_in_t; + phi::DenseTensor index_in_t; int* idx_in = index_in_t.mutable_data({total_roi_num}, dev_ctx.GetPlace()); platform::ForRange for_range_total(dev_ctx, total_roi_num); for_range_total(RangeInitFunctor{0, 1, idx_in}); - Tensor keys_out_t; + phi::DenseTensor keys_out_t; T* keys_out = keys_out_t.mutable_data({total_roi_num}, dev_ctx.GetPlace()); - Tensor index_out_t; + phi::DenseTensor index_out_t; int* idx_out = index_out_t.mutable_data({total_roi_num}, dev_ctx.GetPlace()); @@ -175,21 +173,21 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { sizeof(T) * 8, dev_ctx.stream()); index_out_t.Resize({real_post_num}); - Tensor sorted_rois; + phi::DenseTensor sorted_rois; sorted_rois.mutable_data({real_post_num, kBBoxSize}, dev_ctx.GetPlace()); - Tensor sorted_batch_id; + phi::DenseTensor sorted_batch_id; sorted_batch_id.mutable_data({real_post_num}, dev_ctx.GetPlace()); phi::funcs::GPUGather(dev_ctx, concat_rois, index_out_t, &sorted_rois); phi::funcs::GPUGather( dev_ctx, roi_batch_id_list_gpu, index_out_t, &sorted_batch_id); - Tensor batch_index_t; + phi::DenseTensor batch_index_t; int* batch_idx_in = batch_index_t.mutable_data({real_post_num}, dev_ctx.GetPlace()); platform::ForRange for_range_post(dev_ctx, real_post_num); for_range_post(RangeInitFunctor{0, 1, batch_idx_in}); - Tensor out_id_t; + phi::DenseTensor out_id_t; int* out_id_data = out_id_t.mutable_data({real_post_num}, dev_ctx.GetPlace()); // Determine temporary device storage requirements @@ -222,7 +220,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { phi::funcs::GPUGather(dev_ctx, sorted_rois, index_out_t, fpn_rois); - Tensor length_lod; + phi::DenseTensor length_lod; int* length_lod_data = length_lod.mutable_data({lod_size}, dev_ctx.GetPlace()); phi::funcs::SetConstant set_zero; diff --git a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc index a6f9170712d96..d1a609ad45de6 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc @@ -15,7 +15,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using fp16 = paddle::platform::float16; template @@ -89,7 +88,7 @@ struct DensityPriorBoxFunction { const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Concat(const std::vector& inputs, + void Concat(const std::vector& inputs, int axis, phi::DenseTensor* output) { // output should be init first @@ -131,14 +130,14 @@ struct DensityPriorBoxFunction { platform::Place place; aclrtStream stream; const framework::ExecutionContext& ctx; - Tensor t0; - Tensor t1; - Tensor tn; + phi::DenseTensor t0; + phi::DenseTensor t1; + phi::DenseTensor tn; }; template <> void DensityPriorBoxFunction::Arange(int n, phi::DenseTensor* x) { - Tensor x_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); @@ -149,7 +148,7 @@ void DensityPriorBoxFunction::Arange(int n, phi::DenseTensor* x) { template <> void DensityPriorBoxFunction::FloatVec2Tsr(const std::vector& vec, phi::DenseTensor* tsr_dst) { - Tensor tsr_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor tsr_fp32(experimental::DataType::FLOAT32); tsr_fp32.mutable_data(tsr_dst->dims(), place); framework::TensorFromVector(vec, ctx.device_context(), &tsr_fp32); ctx.template device_context().Wait(); @@ -185,9 +184,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); DensityPriorBoxFunction F(ctx); - Tensor h(_type); + phi::DenseTensor h(_type); h.mutable_data({layer_h}, place); - Tensor w(_type); + phi::DenseTensor w(_type); w.mutable_data({layer_w}, place); F.Arange(layer_h, &h); F.Arange(layer_w, &w); @@ -203,11 +202,11 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { for (size_t i = 0; i < densities.size(); ++i) { num_priors_per_ratio += densities[i] * densities[i]; } - Tensor di(_type); - Tensor dj(_type); - Tensor shifts(_type); - Tensor box_w_ratio(_type); - Tensor box_h_ratio(_type); + phi::DenseTensor di(_type); + phi::DenseTensor dj(_type); + phi::DenseTensor shifts(_type); + phi::DenseTensor box_w_ratio(_type); + phi::DenseTensor box_h_ratio(_type); di.mutable_data({ratios_size * num_priors_per_ratio}, place); dj.mutable_data({ratios_size * num_priors_per_ratio}, place); shifts.mutable_data({ratios_size * num_priors_per_ratio}, place); @@ -220,19 +219,21 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { // Range = start:start+ratios_size*density_sqr, density = densities[i] int density_sqr = densities[i] * densities[i]; // shifts[Range] = [step_average/density]*ratios_size*density_sqr - Tensor shifts_part = + phi::DenseTensor shifts_part = shifts.Slice(start, start + ratios_size * density_sqr); FillNpuTensorWithConstant(&shifts_part, static_cast(step_average / densities[i])); // di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size // dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size - Tensor di_part = di.Slice(start, start + ratios_size * density_sqr); - Tensor dj_part = dj.Slice(start, start + ratios_size * density_sqr); + phi::DenseTensor di_part = + di.Slice(start, start + ratios_size * density_sqr); + phi::DenseTensor dj_part = + dj.Slice(start, start + ratios_size * density_sqr); if (densities[i] > 1) { di_part.Resize({ratios_size, densities[i], densities[i]}); dj_part.Resize({ratios_size, densities[i], densities[i]}); - Tensor range_n(_type); + phi::DenseTensor range_n(_type); range_n.mutable_data({densities[i]}, place); F.Arange(densities[i], &range_n); range_n.Resize({1, densities[i], 1}); @@ -254,9 +255,9 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { // Range_mini = start_box_ratio:start_box_ratio+density_sqr // box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr // box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr - Tensor box_h_ratio_part = + phi::DenseTensor box_h_ratio_part = box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); - Tensor box_w_ratio_part = + phi::DenseTensor box_w_ratio_part = box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr); FillNpuTensorWithConstant(&box_w_ratio_part, static_cast(fixed_sizes[i] * sqrt(ar))); @@ -274,8 +275,8 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { // c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts // c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts - Tensor c_x(_type); - Tensor c_y(_type); + phi::DenseTensor c_x(_type); + phi::DenseTensor c_y(_type); auto dim0 = phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1}); auto dim1 = @@ -301,17 +302,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { F.Muls(&box_w_ratio, static_cast(0.5), &box_w_ratio); F.Muls(&box_h_ratio, static_cast(0.5), &box_h_ratio); - Tensor zero_t(_type); - Tensor one_t(_type); + phi::DenseTensor zero_t(_type); + phi::DenseTensor one_t(_type); zero_t.mutable_data({1}, place); one_t.mutable_data({1}, place); FillNpuTensorWithConstant(&zero_t, static_cast(0)); FillNpuTensorWithConstant(&one_t, static_cast(1)); - Tensor outbox0(_type); - Tensor outbox1(_type); - Tensor outbox2(_type); - Tensor outbox3(_type); + phi::DenseTensor outbox0(_type); + phi::DenseTensor outbox1(_type); + phi::DenseTensor outbox2(_type); + phi::DenseTensor outbox3(_type); outbox0.mutable_data(dim0, place); outbox1.mutable_data(dim1, place); outbox2.mutable_data(dim0, place); @@ -349,17 +350,17 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { {layer_h, layer_w, ratios_size * num_priors_per_ratio, 4}); boxes->mutable_data(place); vars->mutable_data(place); - Tensor boxes_share(_type); - Tensor vars_share(_type); + phi::DenseTensor boxes_share(_type); + phi::DenseTensor vars_share(_type); boxes_share.ShareDataWith(*boxes); boxes_share.Resize(out_dim); vars_share.ShareDataWith(*vars); vars_share.Resize(out_dim); - Tensor box0(_type); - Tensor box1(_type); - Tensor box2(_type); - Tensor box3(_type); + phi::DenseTensor box0(_type); + phi::DenseTensor box1(_type); + phi::DenseTensor box2(_type); + phi::DenseTensor box3(_type); // out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1} out_dim[3] = 1; box0.mutable_data(out_dim, place); @@ -377,7 +378,7 @@ class DensityPriorBoxOpNPUKernel : public framework::OpKernel { std::vector multiples = { layer_h, layer_w, ratios_size * num_priors_per_ratio, 1}; - Tensor variances_t(_type); + phi::DenseTensor variances_t(_type); // variances.size() == 4 variances_t.mutable_data({4}, place); F.FloatVec2Tsr(variances, &variances_t); diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index f14768168a425..7ae5ba6ca8f9c 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -25,7 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; const int kBoxDim = 4; template @@ -151,16 +150,17 @@ static inline void ExpandMaskTarget(const phi::CPUContext& ctx, } template -std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, - const phi::DenseTensor& im_info, - const phi::DenseTensor& gt_classes, - const phi::DenseTensor& is_crowd, - const phi::DenseTensor& gt_segms, - const phi::DenseTensor& rois, - const phi::DenseTensor& label_int32, - const int num_classes, - const int resolution, - const framework::LoD& segm_length) { +std::vector SampleMaskForOneImage( + const phi::CPUContext& ctx, + const phi::DenseTensor& im_info, + const phi::DenseTensor& gt_classes, + const phi::DenseTensor& is_crowd, + const phi::DenseTensor& gt_segms, + const phi::DenseTensor& rois, + const phi::DenseTensor& label_int32, + const int num_classes, + const int resolution, + const framework::LoD& segm_length) { // Prepare the mask targets by associating one gt mask to each training roi // that has a fg (non-bg) class label. const int64_t gt_size = static_cast(gt_classes.dims()[0]); @@ -218,15 +218,15 @@ std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, int gt_num = mask_gt_inds.size(); int fg_num = fg_inds.size(); - Tensor boxes_from_polys; + phi::DenseTensor boxes_from_polys; boxes_from_polys.mutable_data({gt_num, 4}, platform::CPUPlace()); Poly2Boxes(gt_polys, boxes_from_polys.data()); std::vector roi_has_mask = std::vector(fg_inds.begin(), fg_inds.end()); - Tensor mask_class_labels; - Tensor masks; - Tensor rois_fg; + phi::DenseTensor mask_class_labels; + phi::DenseTensor masks; + phi::DenseTensor rois_fg; auto im_scale = im_info.data()[2]; if (fg_num > 0) { @@ -251,7 +251,7 @@ std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, rois_fg_data[k] = rois_fg_data[k] / im_scale; } - Tensor overlaps_bbfg_bbpolys; + phi::DenseTensor overlaps_bbfg_bbpolys; overlaps_bbfg_bbpolys.mutable_data({fg_num, gt_num}, ctx.GetPlace()); BboxOverlaps(rois_fg, boxes_from_polys, &overlaps_bbfg_bbpolys); @@ -306,7 +306,7 @@ std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, roi_has_mask = std::vector(bg_inds.begin(), bg_inds.end()); } - Tensor masks_expand; + phi::DenseTensor masks_expand; ExpandMaskTarget( ctx, masks, mask_class_labels, resolution, num_classes, &masks_expand); @@ -315,13 +315,13 @@ std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, rois_fg_data[k] = rois_fg_data[k] * im_scale; } - Tensor roi_has_mask_t; + phi::DenseTensor roi_has_mask_t; int roi_has_mask_size = roi_has_mask.size(); int* roi_has_mask_data = roi_has_mask_t.mutable_data({roi_has_mask_size, 1}, ctx.GetPlace()); std::copy(roi_has_mask.begin(), roi_has_mask.end(), roi_has_mask_data); - std::vector res; + std::vector res; res.emplace_back(rois_fg); res.emplace_back(roi_has_mask_t); res.emplace_back(masks_expand); @@ -405,23 +405,23 @@ class GenerateMaskLabelsKernel : public framework::OpKernel { lod0.emplace_back(num_mask); continue; } - Tensor im_info_slice = im_info->Slice(i, i + 1); - Tensor gt_classes_slice = + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor gt_classes_slice = gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); - Tensor is_crowd_slice = + phi::DenseTensor is_crowd_slice = is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); - Tensor label_int32_slice = + phi::DenseTensor label_int32_slice = label_int32->Slice(label_int32_lod[i], label_int32_lod[i + 1]); - Tensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]); + phi::DenseTensor rois_slice = rois->Slice(rois_lod[i], rois_lod[i + 1]); auto sub_lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(gt_segms_lod, i, i + 1, 0); auto lod_length = sub_lod_and_offset.first; size_t s = sub_lod_and_offset.second.first; size_t e = sub_lod_and_offset.second.second; - Tensor gt_segms_slice = gt_segms->Slice(s, e); + phi::DenseTensor gt_segms_slice = gt_segms->Slice(s, e); - std::vector tensor_output = + std::vector tensor_output = SampleMaskForOneImage(dev_ctx, im_info_slice, gt_classes_slice, @@ -433,9 +433,9 @@ class GenerateMaskLabelsKernel : public framework::OpKernel { resolution, lod_length); - Tensor sampled_mask_rois = tensor_output[0]; - Tensor sampled_roi_has_mask_int32 = tensor_output[1]; - Tensor sampled_mask_int32 = tensor_output[2]; + phi::DenseTensor sampled_mask_rois = tensor_output[0]; + phi::DenseTensor sampled_roi_has_mask_int32 = tensor_output[1]; + phi::DenseTensor sampled_mask_int32 = tensor_output[2]; AppendMask(mask_rois, kBoxDim * num_mask, &sampled_mask_rois); AppendMask( diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index 1071641b6bc60..b11030f1d086a 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -25,7 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; const int kBoxDim = 4; template @@ -174,7 +173,7 @@ void Concat(const phi::CPUContext& context, const phi::DenseTensor& in_tensor_b, phi::DenseTensor* out_tensor) { int axis = 0; - std::vector inputs; + std::vector inputs; inputs.emplace_back(in_tensor_a); inputs.emplace_back(in_tensor_b); math::ConcatFunctor concat_functor; @@ -300,7 +299,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, phi::DenseTensor* sampled_max_overlap) { int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); - Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; + phi::DenseTensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; int* fg_inds_data = fg_inds_t.mutable_data({fg_num}, context.GetPlace()); int* bg_inds_data = bg_inds_t.mutable_data({bg_num}, context.GetPlace()); int* gt_box_inds_data = @@ -312,7 +311,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, std::copy(gt_inds.begin(), gt_inds.end(), gt_box_inds_data); std::copy(gt_inds.begin(), gt_inds.end(), gt_label_inds_data); - Tensor fg_boxes, bg_boxes, fg_labels, bg_labels; + phi::DenseTensor fg_boxes, bg_boxes, fg_labels, bg_labels; fg_boxes.mutable_data({fg_num, kBoxDim}, context.GetPlace()); phi::funcs::CPUGather(context, boxes, fg_inds_t, &fg_boxes); bg_boxes.mutable_data({bg_num, kBoxDim}, context.GetPlace()); @@ -325,7 +324,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, phi::funcs::set_constant(context, &bg_labels, 0); Concat(context, fg_labels, bg_labels, sampled_labels); - Tensor fg_max_overlap, bg_max_overlap; + phi::DenseTensor fg_max_overlap, bg_max_overlap; fg_max_overlap.mutable_data({fg_num}, context.GetPlace()); phi::funcs::CPUGather(context, max_overlap, fg_inds_t, &fg_max_overlap); bg_max_overlap.mutable_data({bg_num}, context.GetPlace()); @@ -334,7 +333,7 @@ void GatherBoxesLabels(const phi::CPUContext& context, } template -std::vector SampleRoisForOneImage( +std::vector SampleRoisForOneImage( const phi::CPUContext& context, const phi::DenseTensor& rpn_rois_in, const phi::DenseTensor& gt_classes, @@ -355,7 +354,7 @@ std::vector SampleRoisForOneImage( const phi::DenseTensor& max_overlap) { // 1.1 map to original image auto im_scale = im_info.data()[2]; - Tensor rpn_rois; + phi::DenseTensor rpn_rois; rpn_rois.mutable_data(rpn_rois_in.dims(), context.GetPlace()); const T* rpn_rois_in_dt = rpn_rois_in.data(); T* rpn_rois_dt = rpn_rois.data(); @@ -367,10 +366,10 @@ std::vector SampleRoisForOneImage( int proposals_num = 1; if (is_cascade_rcnn) { - Tensor keep; + phi::DenseTensor keep; FilterRoIs(context, rpn_rois, max_overlap, &keep); - Tensor roi_filter; - // Tensor box_filter; + phi::DenseTensor roi_filter; + // phi::DenseTensor box_filter; if (keep.numel() == 0) { phi::funcs::SetConstant set_zero; roi_filter.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); @@ -389,16 +388,16 @@ std::vector SampleRoisForOneImage( // 1.2 compute overlaps proposals_num += gt_boxes.dims()[0]; - Tensor proposal_to_gt_overlaps; + phi::DenseTensor proposal_to_gt_overlaps; proposal_to_gt_overlaps.mutable_data({proposals_num, gt_boxes.dims()[0]}, context.GetPlace()); - Tensor boxes; + phi::DenseTensor boxes; boxes.mutable_data({proposals_num, kBoxDim}, context.GetPlace()); Concat(context, gt_boxes, rpn_rois, &boxes); BboxOverlaps(boxes, gt_boxes, &proposal_to_gt_overlaps); - Tensor proposal_with_max_overlap; + phi::DenseTensor proposal_with_max_overlap; proposal_with_max_overlap.mutable_data({proposals_num}, context.GetPlace()); @@ -423,7 +422,8 @@ std::vector SampleRoisForOneImage( std::vector mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels // Gather boxes and labels - Tensor sampled_boxes, sampled_labels, sampled_gts, sampled_max_overlap; + phi::DenseTensor sampled_boxes, sampled_labels, sampled_gts, + sampled_max_overlap; int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); int boxes_num = fg_num + bg_num; @@ -446,7 +446,7 @@ std::vector SampleRoisForOneImage( &sampled_max_overlap); // Compute targets - Tensor bbox_targets_single; + phi::DenseTensor bbox_targets_single; bbox_targets_single.mutable_data(bbox_dim, context.GetPlace()); BoxToDelta(fg_num, sampled_boxes, @@ -456,14 +456,14 @@ std::vector SampleRoisForOneImage( &bbox_targets_single); // Scale rois - Tensor sampled_rois; + phi::DenseTensor sampled_rois; sampled_rois.mutable_data(sampled_boxes.dims(), context.GetPlace()); auto sampled_rois_et = framework::EigenTensor::From(sampled_rois); auto sampled_boxes_et = framework::EigenTensor::From(sampled_boxes); sampled_rois_et = sampled_boxes_et * im_scale; // Expand box targets - Tensor bbox_targets, bbox_inside_weights, bbox_outside_weights; + phi::DenseTensor bbox_targets, bbox_inside_weights, bbox_outside_weights; framework::DDim bbox_expand_dim({boxes_num, kBoxDim * class_nums}); bbox_targets.mutable_data(bbox_expand_dim, context.GetPlace()); bbox_inside_weights.mutable_data(bbox_expand_dim, context.GetPlace()); @@ -500,7 +500,7 @@ std::vector SampleRoisForOneImage( bbox_outside_weights_data[dst_idx + 3] = 1; } } - std::vector res; + std::vector res; res.emplace_back(sampled_rois); res.emplace_back(sampled_labels); res.emplace_back(bbox_targets); @@ -610,16 +610,16 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { lod0.emplace_back(num_rois); continue; } - Tensor rpn_rois_slice = + phi::DenseTensor rpn_rois_slice = rpn_rois->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); - Tensor gt_classes_slice = + phi::DenseTensor gt_classes_slice = gt_classes->Slice(gt_classes_lod[i], gt_classes_lod[i + 1]); - Tensor is_crowd_slice = + phi::DenseTensor is_crowd_slice = is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); - Tensor gt_boxes_slice = + phi::DenseTensor gt_boxes_slice = gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); - Tensor im_info_slice = im_info->Slice(i, i + 1); - Tensor max_overlap_slice; + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor max_overlap_slice; if (is_cascade_rcnn) { auto* max_overlap = context.Input("MaxOverlap"); max_overlap_slice = @@ -628,7 +628,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { max_overlap_slice.mutable_data({rpn_rois_slice.dims()[0]}, context.GetPlace()); } - std::vector tensor_output = + std::vector tensor_output = SampleRoisForOneImage(dev_ctx, rpn_rois_slice, gt_classes_slice, @@ -647,12 +647,12 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { is_cascade_rcnn, is_cls_agnostic, max_overlap_slice); - Tensor sampled_rois = tensor_output[0]; - Tensor sampled_labels_int32 = tensor_output[1]; - Tensor sampled_bbox_targets = tensor_output[2]; - Tensor sampled_bbox_inside_weights = tensor_output[3]; - Tensor sampled_bbox_outside_weights = tensor_output[4]; - Tensor sampled_max_overlap = tensor_output[5]; + phi::DenseTensor sampled_rois = tensor_output[0]; + phi::DenseTensor sampled_labels_int32 = tensor_output[1]; + phi::DenseTensor sampled_bbox_targets = tensor_output[2]; + phi::DenseTensor sampled_bbox_inside_weights = tensor_output[3]; + phi::DenseTensor sampled_bbox_outside_weights = tensor_output[4]; + phi::DenseTensor sampled_max_overlap = tensor_output[5]; AppendRois(rois, kBoxDim * num_rois, &sampled_rois); AppendRois(labels_int32, num_rois, &sampled_labels_int32); diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index 6491c8b8fcece..030b99cd1dbd7 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -27,8 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class GenerateProposalsOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -115,7 +113,7 @@ class GenerateProposalsKernel : public framework::OpKernel { context.GetPlace()); rpn_roi_probs->mutable_data({scores->numel(), 1}, context.GetPlace()); - Tensor bbox_deltas_swap, scores_swap; + phi::DenseTensor bbox_deltas_swap, scores_swap; bbox_deltas_swap.mutable_data({num, h_bbox, w_bbox, c_bbox}, dev_ctx.GetPlace()); scores_swap.mutable_data({num, h_score, w_score, c_score}, @@ -136,14 +134,14 @@ class GenerateProposalsKernel : public framework::OpKernel { int64_t num_proposals = 0; for (int64_t i = 0; i < num; ++i) { - Tensor im_info_slice = im_info->Slice(i, i + 1); - Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); - Tensor scores_slice = scores_swap.Slice(i, i + 1); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); + phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); scores_slice.Resize({h_score * w_score * c_score, 1}); - std::pair tensor_pair = + std::pair tensor_pair = ProposalForOneImage(dev_ctx, im_info_slice, anchors, @@ -155,8 +153,8 @@ class GenerateProposalsKernel : public framework::OpKernel { nms_thresh, min_size, eta); - Tensor &proposals = tensor_pair.first; - Tensor &scores = tensor_pair.second; + phi::DenseTensor &proposals = tensor_pair.first; + phi::DenseTensor &scores = tensor_pair.second; AppendProposals(rpn_rois, 4 * num_proposals, proposals); AppendProposals(rpn_roi_probs, num_proposals, scores); @@ -179,13 +177,13 @@ class GenerateProposalsKernel : public framework::OpKernel { rpn_roi_probs->Resize({num_proposals, 1}); } - std::pair ProposalForOneImage( + std::pair ProposalForOneImage( const phi::CPUContext &ctx, - const Tensor &im_info_slice, - const Tensor &anchors, - const Tensor &variances, - const Tensor &bbox_deltas_slice, // [M, 4] - const Tensor &scores_slice, // [N, 1] + const phi::DenseTensor &im_info_slice, + const phi::DenseTensor &anchors, + const phi::DenseTensor &variances, + const phi::DenseTensor &bbox_deltas_slice, // [M, 4] + const phi::DenseTensor &scores_slice, // [N, 1] int pre_nms_top_n, int post_nms_top_n, float nms_thresh, @@ -194,7 +192,7 @@ class GenerateProposalsKernel : public framework::OpKernel { auto *scores_data = scores_slice.data(); // Sort index - Tensor index_t; + phi::DenseTensor index_t; index_t.Resize({scores_slice.numel()}); int *index = index_t.mutable_data(ctx.GetPlace()); for (int i = 0; i < scores_slice.numel(); ++i) { @@ -212,7 +210,7 @@ class GenerateProposalsKernel : public framework::OpKernel { index_t.Resize({pre_nms_top_n}); } - Tensor scores_sel, bbox_sel, anchor_sel, var_sel; + phi::DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel; scores_sel.mutable_data({index_t.numel(), 1}, ctx.GetPlace()); bbox_sel.mutable_data({index_t.numel(), 4}, ctx.GetPlace()); anchor_sel.mutable_data({index_t.numel(), 4}, ctx.GetPlace()); @@ -223,26 +221,26 @@ class GenerateProposalsKernel : public framework::OpKernel { phi::funcs::CPUGather(ctx, anchors, index_t, &anchor_sel); phi::funcs::CPUGather(ctx, variances, index_t, &var_sel); - Tensor proposals; + phi::DenseTensor proposals; proposals.mutable_data({index_t.numel(), 4}, ctx.GetPlace()); BoxCoder(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals); ClipTiledBoxes(ctx, im_info_slice, proposals, &proposals, false); - Tensor keep; + phi::DenseTensor keep; FilterBoxes(ctx, &proposals, min_size, im_info_slice, true, &keep); // Handle the case when there is no keep index left if (keep.numel() == 0) { phi::funcs::SetConstant set_zero; bbox_sel.mutable_data({1, 4}, ctx.GetPlace()); set_zero(ctx, &bbox_sel, static_cast(0)); - Tensor scores_filter; + phi::DenseTensor scores_filter; scores_filter.mutable_data({1, 1}, ctx.GetPlace()); set_zero(ctx, &scores_filter, static_cast(0)); return std::make_pair(bbox_sel, scores_filter); } - Tensor scores_filter; + phi::DenseTensor scores_filter; bbox_sel.mutable_data({keep.numel(), 4}, ctx.GetPlace()); scores_filter.mutable_data({keep.numel(), 1}, ctx.GetPlace()); phi::funcs::CPUGather(ctx, proposals, keep, &bbox_sel); @@ -251,7 +249,7 @@ class GenerateProposalsKernel : public framework::OpKernel { return std::make_pair(bbox_sel, scores_filter); } - Tensor keep_nms = + phi::DenseTensor keep_nms = phi::funcs::NMS(ctx, &bbox_sel, &scores_filter, nms_thresh, eta); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 0890ff493332c..5d7a034c28a8f 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -28,24 +28,22 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - namespace { template -static std::pair ProposalForOneImage( +static std::pair ProposalForOneImage( const phi::GPUContext &ctx, - const Tensor &im_info, - const Tensor &anchors, - const Tensor &variances, - const Tensor &bbox_deltas, // [M, 4] - const Tensor &scores, // [N, 1] + const phi::DenseTensor &im_info, + const phi::DenseTensor &anchors, + const phi::DenseTensor &variances, + const phi::DenseTensor &bbox_deltas, // [M, 4] + const phi::DenseTensor &scores, // [N, 1] int pre_nms_top_n, int post_nms_top_n, float nms_thresh, float min_size, float eta) { // 1. pre nms - Tensor scores_sort, index_sort; + phi::DenseTensor scores_sort, index_sort; SortDescending(ctx, scores, &scores_sort, &index_sort); int num = scores.numel(); int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel() @@ -54,7 +52,7 @@ static std::pair ProposalForOneImage( index_sort.Resize({pre_nms_num, 1}); // 2. box decode and clipping - Tensor proposals; + phi::DenseTensor proposals; proposals.mutable_data({pre_nms_num, 4}, ctx.GetPlace()); { @@ -68,7 +66,7 @@ static std::pair ProposalForOneImage( } // 3. filter - Tensor keep_index, keep_num_t; + phi::DenseTensor keep_index, keep_num_t; keep_index.mutable_data({pre_nms_num}, ctx.GetPlace()); keep_num_t.mutable_data({1}, ctx.GetPlace()); min_size = std::max(min_size, 1.0f); @@ -90,7 +88,7 @@ static std::pair ProposalForOneImage( ctx.Wait(); keep_index.Resize({keep_num}); - Tensor scores_filter, proposals_filter; + phi::DenseTensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { phi::funcs::SetConstant set_zero; @@ -110,13 +108,13 @@ static std::pair ProposalForOneImage( } // 4. nms - Tensor keep_nms; + phi::DenseTensor keep_nms; NMS(ctx, proposals_filter, keep_index, nms_thresh, &keep_nms); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { keep_nms.Resize({post_nms_top_n}); } - Tensor scores_nms, proposals_nms; + phi::DenseTensor scores_nms, proposals_nms; proposals_nms.mutable_data({keep_nms.numel(), 4}, ctx.GetPlace()); scores_nms.mutable_data({keep_nms.numel(), 1}, ctx.GetPlace()); phi::funcs::GPUGather(ctx, proposals_filter, keep_nms, &proposals_nms); @@ -171,7 +169,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { int64_t h_bbox = bbox_dim[2]; int64_t w_bbox = bbox_dim[3]; - Tensor bbox_deltas_swap, scores_swap; + phi::DenseTensor bbox_deltas_swap, scores_swap; bbox_deltas_swap.mutable_data({num, h_bbox, w_bbox, c_bbox}, dev_ctx.GetPlace()); scores_swap.mutable_data({num, h_score, w_score, c_score}, @@ -200,14 +198,14 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { std::vector tmp_num; for (int64_t i = 0; i < num; ++i) { - Tensor im_info_slice = im_info->Slice(i, i + 1); - Tensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); - Tensor scores_slice = scores_swap.Slice(i, i + 1); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); + phi::DenseTensor scores_slice = scores_swap.Slice(i, i + 1); bbox_deltas_slice.Resize({h_bbox * w_bbox * c_bbox / 4, 4}); scores_slice.Resize({h_score * w_score * c_score, 1}); - std::pair box_score_pair = + std::pair box_score_pair = ProposalForOneImage(dev_ctx, im_info_slice, anchors, @@ -220,8 +218,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { min_size, eta); - Tensor &proposals = box_score_pair.first; - Tensor &scores = box_score_pair.second; + phi::DenseTensor &proposals = box_score_pair.first; + phi::DenseTensor &scores = box_score_pair.second; memory::Copy(place, rpn_rois_data + num_proposals * 4, diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc index 08c7a649c1e1f..0445c21b1de3b 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc @@ -29,8 +29,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class GenerateProposalsV2Op : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc index 22bba5c57ffd8..2909c333e16ac 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct IouFunction { public: @@ -182,21 +180,21 @@ class IouSimilarityMLUKernel : public framework::OpKernel { auto M = y->dims()[0]; out->mutable_data({N, M}, place); - Tensor xt(_type); - Tensor yt(_type); + phi::DenseTensor xt(_type); + phi::DenseTensor yt(_type); xt.mutable_data({4, N}, place); yt.mutable_data({4, M}, place); std::vector vec_trans = {1, 0}; F.Transpose(x, &xt, vec_trans); F.Transpose(y, &yt, vec_trans); - Tensor xmin1 = xt.Slice(0, 1); - Tensor ymin1 = xt.Slice(1, 2); - Tensor xmax1 = xt.Slice(2, 3); - Tensor ymax1 = xt.Slice(3, 4); - Tensor xmin2 = yt.Slice(0, 1); - Tensor ymin2 = yt.Slice(1, 2); - Tensor xmax2 = yt.Slice(2, 3); - Tensor ymax2 = yt.Slice(3, 4); + phi::DenseTensor xmin1 = xt.Slice(0, 1); + phi::DenseTensor ymin1 = xt.Slice(1, 2); + phi::DenseTensor xmax1 = xt.Slice(2, 3); + phi::DenseTensor ymax1 = xt.Slice(3, 4); + phi::DenseTensor xmin2 = yt.Slice(0, 1); + phi::DenseTensor ymin2 = yt.Slice(1, 2); + phi::DenseTensor xmax2 = yt.Slice(2, 3); + phi::DenseTensor ymax2 = yt.Slice(3, 4); xmin1.Resize({N, 1}); ymin1.Resize({N, 1}); xmax1.Resize({N, 1}); @@ -206,12 +204,12 @@ class IouSimilarityMLUKernel : public framework::OpKernel { xmax2.Resize({1, M}); ymax2.Resize({1, M}); - Tensor w1(_type); - Tensor h1(_type); - Tensor w2(_type); - Tensor h2(_type); - Tensor area1(_type); - Tensor area2(_type); + phi::DenseTensor w1(_type); + phi::DenseTensor h1(_type); + phi::DenseTensor w2(_type); + phi::DenseTensor h2(_type); + phi::DenseTensor area1(_type); + phi::DenseTensor area2(_type); w1.mutable_data({N, 1}, place); h1.mutable_data({N, 1}, place); w2.mutable_data({1, M}, place); @@ -231,10 +229,10 @@ class IouSimilarityMLUKernel : public framework::OpKernel { F.Mul(&w1, &h1, &area1); F.Mul(&w2, &h2, &area2); - Tensor inter_xmax(_type); - Tensor inter_ymax(_type); - Tensor inter_xmin(_type); - Tensor inter_ymin(_type); + phi::DenseTensor inter_xmax(_type); + phi::DenseTensor inter_ymax(_type); + phi::DenseTensor inter_xmin(_type); + phi::DenseTensor inter_ymin(_type); inter_xmax.mutable_data({N, M}, place); inter_ymax.mutable_data({N, M}, place); inter_xmin.mutable_data({N, M}, place); @@ -244,8 +242,8 @@ class IouSimilarityMLUKernel : public framework::OpKernel { F.Maximum(&xmin1, &xmin2, &inter_xmin); F.Maximum(&ymin1, &ymin2, &inter_ymin); - Tensor inter_w(_type); - Tensor inter_h(_type); + phi::DenseTensor inter_w(_type); + phi::DenseTensor inter_h(_type); inter_w.mutable_data({N, M}, place); inter_h.mutable_data({N, M}, place); F.Sub(&inter_xmax, &inter_xmin, &inter_w); @@ -255,14 +253,14 @@ class IouSimilarityMLUKernel : public framework::OpKernel { F.Adds(&inter_w, 1.0f, &inter_w); F.Adds(&inter_h, 1.0f, &inter_h); } - Tensor zeros(_type); + phi::DenseTensor zeros(_type); zeros.mutable_data({1}, place); FillMLUTensorWithHostValue(ctx, static_cast(0), &zeros); F.Maximum(&inter_w, &zeros, &inter_w); F.Maximum(&inter_h, &zeros, &inter_h); F.Mul(&inter_w, &inter_h, out); - Tensor union_area(_type); + phi::DenseTensor union_area(_type); union_area.mutable_data({N, M}, place); F.Add(&area1, &area2, &union_area); F.Sub(&union_area, out, &union_area); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc index 5708d1ae6460a..7bdd105c37ae0 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct IouFunction { public: @@ -108,21 +106,21 @@ class IouSimilarityNPUKernel : public framework::OpKernel { auto M = y->dims()[0]; out->mutable_data({N, M}, place); - Tensor xt(_type); - Tensor yt(_type); + phi::DenseTensor xt(_type); + phi::DenseTensor yt(_type); xt.mutable_data({4, N}, place); yt.mutable_data({4, M}, place); std::vector vec_trans = {1, 0}; F.Transpose(x, &xt, vec_trans); F.Transpose(y, &yt, vec_trans); - Tensor xmin1 = xt.Slice(0, 1); - Tensor ymin1 = xt.Slice(1, 2); - Tensor xmax1 = xt.Slice(2, 3); - Tensor ymax1 = xt.Slice(3, 4); - Tensor xmin2 = yt.Slice(0, 1); - Tensor ymin2 = yt.Slice(1, 2); - Tensor xmax2 = yt.Slice(2, 3); - Tensor ymax2 = yt.Slice(3, 4); + phi::DenseTensor xmin1 = xt.Slice(0, 1); + phi::DenseTensor ymin1 = xt.Slice(1, 2); + phi::DenseTensor xmax1 = xt.Slice(2, 3); + phi::DenseTensor ymax1 = xt.Slice(3, 4); + phi::DenseTensor xmin2 = yt.Slice(0, 1); + phi::DenseTensor ymin2 = yt.Slice(1, 2); + phi::DenseTensor xmax2 = yt.Slice(2, 3); + phi::DenseTensor ymax2 = yt.Slice(3, 4); xmin1.Resize({N, 1}); ymin1.Resize({N, 1}); xmax1.Resize({N, 1}); @@ -132,12 +130,12 @@ class IouSimilarityNPUKernel : public framework::OpKernel { xmax2.Resize({1, M}); ymax2.Resize({1, M}); - Tensor w1(_type); - Tensor h1(_type); - Tensor w2(_type); - Tensor h2(_type); - Tensor area1(_type); - Tensor area2(_type); + phi::DenseTensor w1(_type); + phi::DenseTensor h1(_type); + phi::DenseTensor w2(_type); + phi::DenseTensor h2(_type); + phi::DenseTensor area1(_type); + phi::DenseTensor area2(_type); w1.mutable_data({N, 1}, place); h1.mutable_data({N, 1}, place); w2.mutable_data({1, M}, place); @@ -157,10 +155,10 @@ class IouSimilarityNPUKernel : public framework::OpKernel { F.Mul(&w1, &h1, &area1); F.Mul(&w2, &h2, &area2); - Tensor inter_xmax(_type); - Tensor inter_ymax(_type); - Tensor inter_xmin(_type); - Tensor inter_ymin(_type); + phi::DenseTensor inter_xmax(_type); + phi::DenseTensor inter_ymax(_type); + phi::DenseTensor inter_xmin(_type); + phi::DenseTensor inter_ymin(_type); inter_xmax.mutable_data({N, M}, place); inter_ymax.mutable_data({N, M}, place); inter_xmin.mutable_data({N, M}, place); @@ -170,8 +168,8 @@ class IouSimilarityNPUKernel : public framework::OpKernel { F.Maximum(&xmin1, &xmin2, &inter_xmin); F.Maximum(&ymin1, &ymin2, &inter_ymin); - Tensor inter_w(_type); - Tensor inter_h(_type); + phi::DenseTensor inter_w(_type); + phi::DenseTensor inter_h(_type); inter_w.mutable_data({N, M}, place); inter_h.mutable_data({N, M}, place); F.Sub(&inter_xmax, &inter_xmin, &inter_w); @@ -181,14 +179,14 @@ class IouSimilarityNPUKernel : public framework::OpKernel { F.Adds(&inter_w, 1.0f, &inter_w); F.Adds(&inter_h, 1.0f, &inter_h); } - Tensor zeros(_type); + phi::DenseTensor zeros(_type); zeros.mutable_data({1}, place); FillNpuTensorWithConstant(&zeros, static_cast(0)); F.Maximum(&inter_w, &zeros, &inter_w); F.Maximum(&inter_h, &zeros, &inter_h); F.Mul(&inter_w, &inter_h, out); - Tensor union_area(_type); + phi::DenseTensor union_area(_type); union_area.mutable_data({N, M}, place); F.Add(&area1, &area2, &union_area); F.Sub(&union_area, out, &union_area); diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc index c2b8833bbd96c..1c5135fc4e8a7 100644 --- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc +++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class LocalityAwareNMSOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -252,7 +250,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel { int num_det = 0; int64_t class_num = scores->dims()[0]; - Tensor bbox_slice, score_slice; + phi::DenseTensor bbox_slice, score_slice; for (int64_t c = 0; c < class_num; ++c) { if (c == background_label) continue; @@ -325,7 +323,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel { auto* bboxes_data = bboxes.data(); auto* odata = outs->data(); const T* sdata; - Tensor bbox; + phi::DenseTensor bbox; bbox.Resize({scores.dims()[0], box_size}); int count = 0; for (const auto& it : selected_indices) { @@ -370,7 +368,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel { int64_t box_dim = boxes.dims()[2]; int64_t out_dim = box_dim + 2; int num_nmsed_out = 0; - Tensor boxes_slice, scores_slice; + phi::DenseTensor boxes_slice, scores_slice; int n = batch_size; for (int i = 0; i < n; ++i) { scores_slice = scores.Slice(i, i + 1); @@ -407,7 +405,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel { int64_t s = batch_starts[i]; int64_t e = batch_starts[i + 1]; if (e > s) { - Tensor out = outs->Slice(s, e); + phi::DenseTensor out = outs->Slice(s, e); LocalityAwareNMSOutput(dev_ctx, scores_slice, boxes_slice, diff --git a/paddle/fluid/operators/detection/matrix_nms_op.cc b/paddle/fluid/operators/detection/matrix_nms_op.cc index 21e52a39c37ab..1beeaf1ba3356 100644 --- a/paddle/fluid/operators/detection/matrix_nms_op.cc +++ b/paddle/fluid/operators/detection/matrix_nms_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class MatrixNMSOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 5af93551d786f..79077b3086671 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - inline std::vector GetNmsLodFromRoisNum( const phi::DenseTensor* rois_num) { std::vector rois_lod; @@ -228,7 +226,7 @@ class MultiClassNMSKernel : public framework::OpKernel { int num_det = 0; int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1]; - Tensor bbox_slice, score_slice; + phi::DenseTensor bbox_slice, score_slice; for (int64_t c = 0; c < class_num; ++c) { if (c == background_label) continue; if (scores_size == 3) { @@ -319,7 +317,7 @@ class MultiClassNMSKernel : public framework::OpKernel { auto* bboxes_data = bboxes.data(); auto* odata = outs->data(); const T* sdata; - Tensor bbox; + phi::DenseTensor bbox; bbox.Resize({scores.dims()[0], box_size}); int count = 0; for (const auto& it : selected_indices) { @@ -373,7 +371,7 @@ class MultiClassNMSKernel : public framework::OpKernel { int64_t box_dim = boxes->dims()[2]; int64_t out_dim = box_dim + 2; int num_nmsed_out = 0; - Tensor boxes_slice, scores_slice; + phi::DenseTensor boxes_slice, scores_slice; int n = 0; if (has_roisnum) { n = score_size == 3 ? batch_size : rois_num->numel(); @@ -449,7 +447,7 @@ class MultiClassNMSKernel : public framework::OpKernel { int64_t s = batch_starts[i]; int64_t e = batch_starts[i + 1]; if (e > s) { - Tensor out = outs->Slice(s, e); + phi::DenseTensor out = outs->Slice(s, e); if (return_index) { int* output_idx = index->mutable_data({num_kept, 1}, ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index e386465c3bdf6..7135853f9ff8b 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PolygonBoxTransformCPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu index bbeb9f7f2858a..de43f2d62b455 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cu +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using phi::PADDLE_CUDA_NUM_THREADS; #define CUDA_BLOCK_SIZE 16 diff --git a/paddle/fluid/operators/detection/prior_box_op_npu.cc b/paddle/fluid/operators/detection/prior_box_op_npu.cc index 8a3a313be159c..42845ff20f129 100644 --- a/paddle/fluid/operators/detection/prior_box_op_npu.cc +++ b/paddle/fluid/operators/detection/prior_box_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PriorBoxNPUKernel : public framework::OpKernel { public: @@ -50,7 +48,7 @@ class PriorBoxNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); - Tensor out(input->type()); + phi::DenseTensor out(input->type()); auto out_dims = phi::vectorize(boxes->dims()); out_dims.insert(out_dims.begin(), 2); out.Resize(phi::make_ddim(out_dims)); @@ -75,8 +73,8 @@ class PriorBoxNPUKernel : public framework::OpKernel { runner.Run(stream); out.Resize(phi::make_ddim({out.numel()})); - Tensor out_boxes = out.Slice(0, boxes->numel()); - Tensor out_variances = out.Slice(boxes->numel(), out.numel()); + phi::DenseTensor out_boxes = out.Slice(0, boxes->numel()); + phi::DenseTensor out_variances = out.Slice(boxes->numel(), out.numel()); out_boxes.Resize(boxes->dims()); out_variances.Resize(variances->dims()); diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc index a38765e28d786..d2654e086d08d 100644 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -409,9 +407,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { } void RetinanetDetectionOutput(const framework::ExecutionContext& ctx, - const std::vector& scores, - const std::vector& bboxes, - const std::vector& anchors, + const std::vector& scores, + const std::vector& bboxes, + const std::vector& anchors, const phi::DenseTensor& im_info, std::vector>* nmsed_out, int* num_nmsed_out) const { @@ -425,11 +423,11 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { std::map>> preds; for (size_t l = 0; l < scores.size(); ++l) { // Fetch per level score - Tensor scores_per_level = scores[l]; + phi::DenseTensor scores_per_level = scores[l]; // Fetch per level bbox - Tensor bboxes_per_level = bboxes[l]; + phi::DenseTensor bboxes_per_level = bboxes[l]; // Fetch per level anchor - Tensor anchors_per_level = anchors[l]; + phi::DenseTensor anchors_per_level = anchors[l]; int64_t scores_num = scores_per_level.numel(); int64_t bboxes_num = bboxes_per_level.numel(); @@ -492,9 +490,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { auto* im_info = ctx.Input("ImInfo"); auto* outs = ctx.Output("Out"); - std::vector boxes_list(boxes.size()); - std::vector scores_list(scores.size()); - std::vector anchors_list(anchors.size()); + std::vector boxes_list(boxes.size()); + std::vector scores_list(scores.size()); + std::vector anchors_list(anchors.size()); for (size_t j = 0; j < boxes_list.size(); ++j) { boxes_list[j] = *boxes[j]; scores_list[j] = *scores[j]; @@ -512,8 +510,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { std::vector batch_starts = {0}; for (int i = 0; i < batch_size; ++i) { int num_nmsed_out = 0; - std::vector box_per_batch_list(boxes_list.size()); - std::vector score_per_batch_list(scores_list.size()); + std::vector box_per_batch_list(boxes_list.size()); + std::vector score_per_batch_list(scores_list.size()); for (size_t j = 0; j < boxes_list.size(); ++j) { const auto& score_dims = scores_list[j].dims(); score_per_batch_list[j] = scores_list[j].Slice(i, i + 1); @@ -521,7 +519,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { box_per_batch_list[j] = boxes_list[j].Slice(i, i + 1); box_per_batch_list[j].Resize({score_dims[1], box_dim}); } - Tensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); std::vector> nmsed_out; RetinanetDetectionOutput(ctx, @@ -544,7 +542,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { int64_t s = batch_starts[i]; int64_t e = batch_starts[i + 1]; if (e > s) { - Tensor out = outs->Slice(s, e); + phi::DenseTensor out = outs->Slice(s, e); MultiClassOutput(dev_ctx, all_nmsed_out[i], &out); } } @@ -563,7 +561,8 @@ class RetinanetDetectionOutputOpMaker void Make() override { AddInput("BBoxes", "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D Tensor with shape [N, Mi, 4] represents the " + "element is a 3-D phi::DenseTensor with shape [N, Mi, 4] " + "represents the " "predicted locations of Mi bounding boxes, N is the batch size. " "Mi is the number of bounding boxes from i-th FPN level. Each " "bounding box has four coordinate values and the layout is " @@ -571,18 +570,20 @@ class RetinanetDetectionOutputOpMaker .AsDuplicable(); AddInput("Scores", "(List) A list of tensors from multiple FPN levels. Each " - "element is a 3-D Tensor with shape [N, Mi, C] represents the " + "element is a 3-D phi::DenseTensor with shape [N, Mi, C] " + "represents the " "predicted confidence from its FPN level. N is the batch size, " "C is the class number (excluding background), Mi is the number " "of bounding boxes from i-th FPN level. For each bounding box, " "there are total C scores.") .AsDuplicable(); - AddInput("Anchors", - "(List) A list of tensors from multiple FPN levels. Each" - "element is a 2-D Tensor with shape [Mi, 4] represents the " - "locations of Mi anchor boxes from i-th FPN level. Each " - "bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax].") + AddInput( + "Anchors", + "(List) A list of tensors from multiple FPN levels. Each" + "element is a 2-D phi::DenseTensor with shape [Mi, 4] represents the " + "locations of Mi anchor boxes from i-th FPN level. Each " + "bounding box has four coordinate values and the layout is " + "[xmin, ymin, xmax, ymax].") .AsDuplicable(); AddInput("ImInfo", "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] " diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index ff4c1159119e3..9ba51850ebaaa 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template bool GT_E(T a, T b) { return (a > b) || fabs(a - b) < 1e-4; @@ -600,7 +598,7 @@ class ROIPerspectiveTransformOpMaker public: void Make() override { AddInput("X", - "(Tensor), " + "(phi::DenseTensor), " "the input of ROIPerspectiveTransformOp. " "The format of input tensor is NCHW. Where N is batch size, " "C is the number of input channels, " @@ -617,28 +615,28 @@ class ROIPerspectiveTransformOpMaker "(x4, y4) is the bottom left coordinates."); AddOutput( "Out", - "(Tensor), " + "(phi::DenseTensor), " "The output of ROIPerspectiveTransformOp is a 4-D tensor with shape " "(num_rois, channels, transformed_h, transformed_w)."); AddOutput("Mask", - "(Tensor), " + "(phi::DenseTensor), " "The output mask of ROIPerspectiveTransformOp is a 4-D tensor " "with shape " "(num_rois, 1, transformed_h, transformed_w)."); AddOutput("TransformMatrix", - "(Tensor), " + "(phi::DenseTensor), " "The output transform matrix of ROIPerspectiveTransformOp is a " "1-D tensor with shape " "(num_rois, 9)."); AddOutput("Out2InIdx", - "(Tensor), " + "(phi::DenseTensor), " "An intermediate tensor used to map indexes of input feature map " "and indexes of output feature map." "The shape of the tensor is [out_size, 4] and out_size is the " "number of elements in output feature map.") .AsIntermediate(); AddOutput("Out2InWeights", - "(Tensor), " + "(phi::DenseTensor), " "An intermediate tensor used to record the weights of bilinear " "interpolatein for each element in output. The shape of the " "tensor is [out_size, 4] and out_size is the number of elements " diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index f73ddf9a09e6e..ba7fe51383822 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template @@ -113,11 +112,12 @@ void AppendRpns(phi::DenseTensor* out, } template -std::vector FilterStraddleAnchor(const phi::CPUContext& context, - const phi::DenseTensor* anchor, - const float rpn_straddle_thresh, - T im_height, - T im_width) { +std::vector FilterStraddleAnchor( + const phi::CPUContext& context, + const phi::DenseTensor* anchor, + const float rpn_straddle_thresh, + T im_height, + T im_width) { std::vector inds_inside; int anchor_num = anchor->dims()[0]; auto* anchor_data = anchor->data(); @@ -138,25 +138,25 @@ std::vector FilterStraddleAnchor(const phi::CPUContext& context, } } int inside_num = inds_inside.size(); - Tensor inds_inside_t; + phi::DenseTensor inds_inside_t; int* inds_inside_data = inds_inside_t.mutable_data({inside_num}, context.GetPlace()); std::copy(inds_inside.begin(), inds_inside.end(), inds_inside_data); - Tensor inside_anchor_t; + phi::DenseTensor inside_anchor_t; T* inside_anchor_data = inside_anchor_t.mutable_data({inside_num, 4}, context.GetPlace()); Gather( anchor->data(), 4, inds_inside_data, inside_num, inside_anchor_data); - std::vector res; + std::vector res; res.emplace_back(inds_inside_t); res.emplace_back(inside_anchor_t); return res; } template -Tensor FilterCrowdGt(const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* is_crowd) { +phi::DenseTensor FilterCrowdGt(const phi::CPUContext& context, + phi::DenseTensor* gt_boxes, + phi::DenseTensor* is_crowd) { int gt_num = gt_boxes->dims()[0]; std::vector not_crowd_inds; auto* is_crowd_data = is_crowd->data(); @@ -166,7 +166,7 @@ Tensor FilterCrowdGt(const phi::CPUContext& context, } } int ncrowd_num = not_crowd_inds.size(); - Tensor ncrowd_gt_boxes; + phi::DenseTensor ncrowd_gt_boxes; T* ncrowd_gt_boxes_data = ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); Gather(gt_boxes->data(), @@ -300,7 +300,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, } template -std::vector SampleRpnFgBgGt( +std::vector SampleRpnFgBgGt( const phi::CPUContext& ctx, const phi::DenseTensor& anchor_by_gt_overlap, const int rpn_batch_size_per_im, @@ -322,7 +322,7 @@ std::vector SampleRpnFgBgGt( // Calculate the max IoU between anchors and gt boxes // Map from anchor to gt box that has highest overlap auto place = ctx.GetPlace(); - Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; + phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; anchor_to_gt_max.mutable_data({anchor_num}, place); int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); gt_to_anchor_max.mutable_data({gt_num}, place); @@ -365,7 +365,8 @@ std::vector SampleRpnFgBgGt( for (int i = 0; i < fg_fake_num; ++i) { gt_inds.emplace_back(argmax[fg_fake[i]]); } - Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t; + phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, + bbox_inside_weight_t; int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); int* score_index_data = score_index_t.mutable_data({fg_num + bg_num}, place); @@ -381,7 +382,7 @@ std::vector SampleRpnFgBgGt( std::copy(bbox_inside_weight.begin(), bbox_inside_weight.end(), bbox_inside_weight_data); - std::vector loc_score_tgtlbl_gt; + std::vector loc_score_tgtlbl_gt; loc_score_tgtlbl_gt.emplace_back(loc_index_t); loc_score_tgtlbl_gt.emplace_back(score_index_t); loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); @@ -455,30 +456,30 @@ class RpnTargetAssignKernel : public framework::OpKernel { auto gt_boxes_lod = gt_boxes->lod().back(); auto is_crowd_lod = is_crowd->lod().back(); for (int i = 0; i < batch_num; ++i) { - Tensor gt_boxes_slice = + phi::DenseTensor gt_boxes_slice = gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); - Tensor is_crowd_slice = + phi::DenseTensor is_crowd_slice = is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); - Tensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); auto* im_info_data = im_info_slice.data(); auto im_height = im_info_data[0]; auto im_width = im_info_data[1]; auto im_scale = im_info_data[2]; // Filter straddle anchor - std::vector filter_output = FilterStraddleAnchor( + std::vector filter_output = FilterStraddleAnchor( dev_ctx, anchor, rpn_straddle_thresh, im_height, im_width); - Tensor inds_inside = filter_output[0]; - Tensor inside_anchor = filter_output[1]; + phi::DenseTensor inds_inside = filter_output[0]; + phi::DenseTensor inside_anchor = filter_output[1]; // Filter crowd gt - Tensor ncrowd_gt_boxes = + phi::DenseTensor ncrowd_gt_boxes = FilterCrowdGt(dev_ctx, >_boxes_slice, &is_crowd_slice); auto ncrowd_gt_boxes_et = framework::EigenTensor::From(ncrowd_gt_boxes); ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - Tensor anchor_by_gt_overlap; + phi::DenseTensor anchor_by_gt_overlap; anchor_by_gt_overlap.mutable_data( {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); @@ -492,16 +493,16 @@ class RpnTargetAssignKernel : public framework::OpKernel { engine, use_random); - Tensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; - Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; + phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; + phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; + phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; + phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; + phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; int loc_num = sampled_loc_index.dims()[0]; int score_num = sampled_score_index.dims()[0]; // unmap to all anchor - Tensor sampled_loc_index_unmap, sampled_score_index_unmap; + phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; sampled_loc_index_unmap.mutable_data({loc_num}, place); sampled_score_index_unmap.mutable_data({score_num}, place); Gather(inds_inside.data(), @@ -516,7 +517,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { sampled_score_index_unmap.data()); // get target bbox deltas - Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox; + phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; auto* sampled_anchor_data = sampled_anchor.mutable_data({loc_num, 4}, place); auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); @@ -859,10 +860,11 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { }; template -std::vector FilterCrowdGtBoxLabel(const phi::CPUContext& context, - phi::DenseTensor* gt_boxes, - phi::DenseTensor* gt_labels, - phi::DenseTensor* is_crowd) { +std::vector FilterCrowdGtBoxLabel( + const phi::CPUContext& context, + phi::DenseTensor* gt_boxes, + phi::DenseTensor* gt_labels, + phi::DenseTensor* is_crowd) { int gt_num = gt_boxes->dims()[0]; std::vector not_crowd_inds; auto* is_crowd_data = is_crowd->data(); @@ -872,7 +874,7 @@ std::vector FilterCrowdGtBoxLabel(const phi::CPUContext& context, } } int ncrowd_num = not_crowd_inds.size(); - Tensor ncrowd_gt_boxes, ncrowd_gt_labels; + phi::DenseTensor ncrowd_gt_boxes, ncrowd_gt_labels; T* ncrowd_gt_boxes_data = ncrowd_gt_boxes.mutable_data({ncrowd_num, 4}, context.GetPlace()); int* ncrowd_gt_labels_data = @@ -887,19 +889,20 @@ std::vector FilterCrowdGtBoxLabel(const phi::CPUContext& context, not_crowd_inds.data(), ncrowd_num, ncrowd_gt_labels_data); - std::vector res; + std::vector res; res.emplace_back(ncrowd_gt_boxes); res.emplace_back(ncrowd_gt_labels); return res; } template -std::vector GetAllFgBgGt(const phi::CPUContext& ctx, - const phi::DenseTensor& anchor_by_gt_overlap, - const phi::DenseTensor& ncrowd_gt_labels, - const float positive_overlap, - const float negative_overlap, - std::minstd_rand engine) { +std::vector GetAllFgBgGt( + const phi::CPUContext& ctx, + const phi::DenseTensor& anchor_by_gt_overlap, + const phi::DenseTensor& ncrowd_gt_labels, + const float positive_overlap, + const float negative_overlap, + std::minstd_rand engine) { auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); int anchor_num = anchor_by_gt_overlap.dims()[0]; int gt_num = anchor_by_gt_overlap.dims()[1]; @@ -913,7 +916,7 @@ std::vector GetAllFgBgGt(const phi::CPUContext& ctx, // Calculate the max IoU between anchors and gt boxes // Map from anchor to gt box that has highest overlap auto place = ctx.GetPlace(); - Tensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; + phi::DenseTensor anchor_to_gt_max, anchor_to_gt_argmax, gt_to_anchor_max; anchor_to_gt_max.mutable_data({anchor_num}, place); int* argmax = anchor_to_gt_argmax.mutable_data({anchor_num}, place); gt_to_anchor_max.mutable_data({gt_num}, place); @@ -961,8 +964,9 @@ std::vector GetAllFgBgGt(const phi::CPUContext& ctx, gt_inds.emplace_back(argmax[fg_fake[i]]); } - Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t; - Tensor fg_num_t; + phi::DenseTensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, + bbox_inside_weight_t; + phi::DenseTensor fg_num_t; int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); int* score_index_data = score_index_t.mutable_data({fg_num + bg_num}, place); @@ -980,7 +984,7 @@ std::vector GetAllFgBgGt(const phi::CPUContext& ctx, bbox_inside_weight.end(), bbox_inside_weight_data); fg_num_data[0] = fg_fake.size() + 1; - std::vector loc_score_tgtlbl_gt; + std::vector loc_score_tgtlbl_gt; loc_score_tgtlbl_gt.emplace_back(loc_index_t); loc_score_tgtlbl_gt.emplace_back(score_index_t); loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); @@ -1065,35 +1069,35 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { auto gt_labels_lod = gt_labels->lod().back(); auto is_crowd_lod = is_crowd->lod().back(); for (int i = 0; i < batch_num; ++i) { - Tensor gt_boxes_slice = + phi::DenseTensor gt_boxes_slice = gt_boxes->Slice(gt_boxes_lod[i], gt_boxes_lod[i + 1]); - Tensor gt_labels_slice = + phi::DenseTensor gt_labels_slice = gt_labels->Slice(gt_labels_lod[i], gt_labels_lod[i + 1]); - Tensor is_crowd_slice = + phi::DenseTensor is_crowd_slice = is_crowd->Slice(is_crowd_lod[i], is_crowd_lod[i + 1]); - Tensor im_info_slice = im_info->Slice(i, i + 1); + phi::DenseTensor im_info_slice = im_info->Slice(i, i + 1); auto* im_info_data = im_info_slice.data(); auto im_height = im_info_data[0]; auto im_width = im_info_data[1]; auto im_scale = im_info_data[2]; // Filter straddle anchor - std::vector filter_output = + std::vector filter_output = FilterStraddleAnchor(dev_ctx, anchor, -1, im_height, im_width); - Tensor inds_inside = filter_output[0]; - Tensor inside_anchor = filter_output[1]; + phi::DenseTensor inds_inside = filter_output[0]; + phi::DenseTensor inside_anchor = filter_output[1]; // Filter crowd gt - std::vector ncrowd_output = FilterCrowdGtBoxLabel( + std::vector ncrowd_output = FilterCrowdGtBoxLabel( dev_ctx, >_boxes_slice, >_labels_slice, &is_crowd_slice); - Tensor ncrowd_gt_boxes = ncrowd_output[0]; - Tensor ncrowd_gt_labels = ncrowd_output[1]; + phi::DenseTensor ncrowd_gt_boxes = ncrowd_output[0]; + phi::DenseTensor ncrowd_gt_labels = ncrowd_output[1]; auto ncrowd_gt_boxes_et = framework::EigenTensor::From(ncrowd_gt_boxes); ncrowd_gt_boxes_et = ncrowd_gt_boxes_et * im_scale; - Tensor anchor_by_gt_overlap; + phi::DenseTensor anchor_by_gt_overlap; anchor_by_gt_overlap.mutable_data( {inside_anchor.dims()[0], ncrowd_gt_boxes.dims()[0]}, place); BboxOverlaps(inside_anchor, ncrowd_gt_boxes, &anchor_by_gt_overlap); @@ -1105,17 +1109,17 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { negative_overlap, engine); - Tensor sampled_loc_index = loc_score_tgtlbl_gt[0]; - Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; - Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; - Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; - Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; - Tensor sampled_fg_num = loc_score_tgtlbl_gt[5]; + phi::DenseTensor sampled_loc_index = loc_score_tgtlbl_gt[0]; + phi::DenseTensor sampled_score_index = loc_score_tgtlbl_gt[1]; + phi::DenseTensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; + phi::DenseTensor sampled_gt_index = loc_score_tgtlbl_gt[3]; + phi::DenseTensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; + phi::DenseTensor sampled_fg_num = loc_score_tgtlbl_gt[5]; int loc_num = sampled_loc_index.dims()[0]; int score_num = sampled_score_index.dims()[0]; // unmap to all anchor - Tensor sampled_loc_index_unmap, sampled_score_index_unmap; + phi::DenseTensor sampled_loc_index_unmap, sampled_score_index_unmap; sampled_loc_index_unmap.mutable_data({loc_num}, place); sampled_score_index_unmap.mutable_data({score_num}, place); Gather(inds_inside.data(), @@ -1130,7 +1134,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel { sampled_score_index_unmap.data()); // get target bbox deltas - Tensor sampled_anchor, sampled_gt, sampled_tgt_bbox; + phi::DenseTensor sampled_anchor, sampled_gt, sampled_tgt_bbox; auto* sampled_anchor_data = sampled_anchor.mutable_data({loc_num, 4}, place); auto* sampled_gt_data = sampled_gt.mutable_data({loc_num, 4}, place); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu index 56d28c20dc8e7..6ff2e9c65d856 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; @@ -123,10 +121,10 @@ template class GPUSigmoidFocalLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - Tensor *Out = context.Output("Out"); + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Labels = context.Input("Label"); + const phi::DenseTensor *FgNum = context.Input("FgNum"); + phi::DenseTensor *Out = context.Output("Out"); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); auto x_dims = X->dims(); @@ -154,12 +152,13 @@ template class GPUSigmoidFocalLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - const Tensor *dOut = + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Labels = context.Input("Label"); + const phi::DenseTensor *FgNum = context.Input("FgNum"); + const phi::DenseTensor *dOut = context.Input(framework::GradVarName("Out")); - Tensor *dX = context.Output(framework::GradVarName("X")); + phi::DenseTensor *dX = + context.Output(framework::GradVarName("X")); auto dx_data = dX->mutable_data(context.GetPlace()); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h index b7c77a5e28222..0632e5ab8fab0 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h @@ -22,16 +22,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SigmoidFocalLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - Tensor *Out = context.Output("Out"); + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Labels = context.Input("Label"); + const phi::DenseTensor *FgNum = context.Input("FgNum"); + phi::DenseTensor *Out = context.Output("Out"); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); auto out_data = Out->mutable_data(context.GetPlace()); @@ -79,12 +77,13 @@ template class SigmoidFocalLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - const Tensor *dOut = + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Labels = context.Input("Label"); + const phi::DenseTensor *FgNum = context.Input("FgNum"); + const phi::DenseTensor *dOut = context.Input(framework::GradVarName("Out")); - Tensor *dX = context.Output(framework::GradVarName("X")); + phi::DenseTensor *dX = + context.Output(framework::GradVarName("X")); auto dx_data = dX->mutable_data(context.GetPlace()); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); diff --git a/paddle/fluid/operators/detection/yolo_box_op_mlu.cc b/paddle/fluid/operators/detection/yolo_box_op_mlu.cc index 739c05805d68a..aac3369381e95 100644 --- a/paddle/fluid/operators/detection/yolo_box_op_mlu.cc +++ b/paddle/fluid/operators/detection/yolo_box_op_mlu.cc @@ -77,7 +77,7 @@ class YoloBoxMLUKernel : public framework::OpKernel { MLUOpTensorDesc x_desc(*x, MLUOP_LAYOUT_ARRAY, ToMluOpDataType()); MLUOpTensorDesc img_size_desc( *img_size, MLUOP_LAYOUT_ARRAY, ToMluOpDataType()); - Tensor anchors_temp(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor anchors_temp(framework::TransToPhiDataType(VT::INT32)); anchors_temp.Resize({size}); paddle::framework::TensorFromVector( anchors, ctx.device_context(), &anchors_temp); diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 5d3cccb3a6617..ada4d18eb00c1 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class DetectionMAPOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/dgc_clip_by_norm_op.h b/paddle/fluid/operators/dgc_clip_by_norm_op.h index d3f55edd8840f..8e362957e46e8 100644 --- a/paddle/fluid/operators/dgc_clip_by_norm_op.h +++ b/paddle/fluid/operators/dgc_clip_by_norm_op.h @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class DGCClipByNormKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/dropout_op_mlu.cc b/paddle/fluid/operators/dropout_op_mlu.cc index 7cf98738d073f..0d0686026da4b 100644 --- a/paddle/fluid/operators/dropout_op_mlu.cc +++ b/paddle/fluid/operators/dropout_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class DropoutMLUKernel : public framework::OpKernel { public: @@ -106,8 +104,8 @@ class DropoutMLUKernel : public framework::OpKernel { } // In downgrade_in_infer mode, need to multiply (1.0f - dropout_prob). - Tensor scale_tensor(x->dtype()); - Tensor bias_tensor(x->dtype()); + phi::DenseTensor scale_tensor(x->dtype()); + phi::DenseTensor bias_tensor(x->dtype()); scale_tensor.mutable_data({1}, ctx.GetPlace()); bias_tensor.mutable_data({1}, ctx.GetPlace()); MLUCnnlTensorDesc scale_desc(scale_tensor); @@ -157,7 +155,7 @@ class DropoutGradMLUKernel : public framework::OpKernel { } // cast mask from uint8 to float32/float16 - Tensor cast_mask(grad_x->dtype()); + phi::DenseTensor cast_mask(grad_x->dtype()); cast_mask.Resize(mask->dims()); cast_mask.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/dropout_op_npu.cc b/paddle/fluid/operators/dropout_op_npu.cc index a63b6e5e479af..72453bedee399 100644 --- a/paddle/fluid/operators/dropout_op_npu.cc +++ b/paddle/fluid/operators/dropout_op_npu.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class DropoutNPUKernel : public framework::OpKernel { public: @@ -56,8 +54,8 @@ class DropoutNPUKernel : public framework::OpKernel { // only achieve the default `upscale_in_train` method if (!is_test) { - Tensor tmp_x(x->dtype()); - Tensor tmp_out(out->dtype()); + phi::DenseTensor tmp_x(x->dtype()); + phi::DenseTensor tmp_out(out->dtype()); tmp_x.ShareDataWith(*x); tmp_out.ShareDataWith(*out); if (x->dims().size() == 1) { @@ -80,7 +78,7 @@ class DropoutNPUKernel : public framework::OpKernel { seed = ctx.Attr("fix_seed") ? ctx.Attr("seed") : 0; } - Tensor keep_prob_tensor(x->dtype()); + phi::DenseTensor keep_prob_tensor(x->dtype()); keep_prob_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&keep_prob_tensor, static_cast(keep_prob)); @@ -89,14 +87,14 @@ class DropoutNPUKernel : public framework::OpKernel { // mask used in `DropOutGenMask` NPU OP is different from // the output `Mask`. - Tensor npu_mask(experimental::DataType::UINT8); + phi::DenseTensor npu_mask(experimental::DataType::UINT8); uint32_t length = (x->numel() + 128 - 1) / 128 * 128; npu_mask.Resize(phi::make_ddim({length / 8})); npu_mask.mutable_data(ctx.GetPlace()); // TODO(pangyoki): `keep_prob` used in `DropOutGenMask` NPU // OP must be a scalar with shape[0]. At present, the shape - // of the `prob` Tensor of this OP is forced to be set to 0 + // of the `prob` phi::DenseTensor of this OP is forced to be set to 0 // in `npu_op_runner.cc`, which needs to be optimized later. NpuOpRunner runner_gen_mask; runner_gen_mask.SetType("DropOutGenMask") @@ -116,7 +114,7 @@ class DropoutNPUKernel : public framework::OpKernel { runner_dropout.Run(stream); // cast `out` from float/float16 to bool - Tensor cast_mask(experimental::DataType::BOOL); + phi::DenseTensor cast_mask(experimental::DataType::BOOL); cast_mask.Resize(mask->dims()); cast_mask.mutable_data(ctx.GetPlace()); auto dst_dtype_bool = @@ -176,7 +174,7 @@ class DropoutGradNPUKernel : public framework::OpKernel { } // cast mask from uint8 to float32/float16 - Tensor cast_mask(dx->dtype()); + phi::DenseTensor cast_mask(dx->dtype()); cast_mask.Resize(mask->dims()); cast_mask.mutable_data(ctx.GetPlace()); auto dst_dtype = diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc index 456a11f95aaca..7c6cd94782a9c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc @@ -16,7 +16,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ElementwiseAddMLUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc index 17a1736c0871b..7b6683255ea93 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ElementwiseAddNPUKernel : public framework::OpKernel { @@ -53,7 +52,7 @@ class ElementwiseAddNPUKernel : public framework::OpKernel { const auto& runner = NpuOpRunner("Add", {*x, *y}, {*out}, {}); runner.Run(dev_ctx.stream()); } else { - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; NpuElementWiseOpBroadcast( dev_ctx, x, y, axis, &transformed_x, &transformed_y); const auto& runner = @@ -96,7 +95,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel { } } if (!reduce_axes.empty()) { - Tensor tmp; + phi::DenseTensor tmp; tmp.ShareDataWith(*dx); tmp.Resize(phi::make_ddim(dst_dims_vec)); const auto& runner = @@ -128,7 +127,7 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel { } } if (!reduce_axes.empty()) { - Tensor tmp; + phi::DenseTensor tmp; tmp.ShareDataWith(*dy); tmp.Resize(phi::make_ddim(dst_dims_vec)); const auto& runner = diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index 236b40c122204..8c7aa350b4372 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -24,7 +24,6 @@ namespace operators { class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext* ctx) const override { auto y_grad_name = framework::GradVarName("Y"); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc index 27f7281b9fb1e..d3e955cd2fe32 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseDivMLUKernel : public framework::OpKernel { public: @@ -66,7 +64,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel { CNNL_OP_TENSOR_MUL, ToCnnlDataType(), CNNL_NOT_PROPAGATE_NAN); // compute dout/y == 1/y * dout - Tensor dout_div_y(dout->dtype()); + phi::DenseTensor dout_div_y(dout->dtype()); dout_div_y.Resize(dout->dims()); dout_div_y.mutable_data(ctx.GetPlace()); MLUBinary
(ctx, @@ -110,7 +108,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel { if (dy) { // compute dy = -out * (dout/y) = -out/y * dout - Tensor neg_out(out->type()); + phi::DenseTensor neg_out(out->type()); neg_out.mutable_data(out->dims(), ctx.GetPlace()); MLUCnnlTensorDesc out_desc(*out); @@ -121,7 +119,7 @@ class ElementwiseDivGradMLUKernel : public framework::OpKernel { out_desc.get(), GetBasePtr(&neg_out)); - Tensor dy_temp(y->dtype()); + phi::DenseTensor dy_temp(y->dtype()); dy_temp.Resize(dout->dims()); dy_temp.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc index 74a2a5b6ca6eb..6cc37517d4fbe 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseDivNPUKernel : public framework::OpKernel { public: @@ -66,38 +64,38 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { if (dx) { dx->mutable_data(place); - Tensor tensor_one(y->type()); + phi::DenseTensor tensor_one(y->type()); tensor_one.mutable_data({1}, place); FillNpuTensorWithConstant(&tensor_one, static_cast(1.0)); // Use `Div` CANN OP to achieve `1/y` instead of `Power` CANN OP. // Because `Power` will cause precision overflow, that is, `float_status` // will be set to 1. - Tensor y_div(y->type()); + phi::DenseTensor y_div(y->type()); y_div.mutable_data(y->dims(), place); const auto& runner_one_div_y = NpuOpRunner("Div", {tensor_one, *y}, {y_div}, {}); runner_one_div_y.Run(stream); - Tensor tensor_zeros(x->type()); + phi::DenseTensor tensor_zeros(x->type()); tensor_zeros.mutable_data(x->dims(), place); const auto& runner_tensor_zeros = NpuOpRunner("ZerosLike", {*x}, {tensor_zeros}, {}); runner_tensor_zeros.Run(stream); - Tensor x_zero(experimental::DataType::BOOL); + phi::DenseTensor x_zero(experimental::DataType::BOOL); x_zero.mutable_data(x->dims(), place); const auto& runner_x_zero = NpuOpRunner("Equal", {*x, tensor_zeros}, {x_zero}, {}); runner_x_zero.Run(stream); - Tensor x_nozero(experimental::DataType::BOOL); + phi::DenseTensor x_nozero(experimental::DataType::BOOL); x_nozero.mutable_data(x->dims(), place); const auto& runner_x_nonzero = NpuOpRunner("LogicalNot", {x_zero}, {x_nozero}, {}); runner_x_nonzero.Run(stream); - Tensor x_nozero_f(x->type()); + phi::DenseTensor x_nozero_f(x->type()); x_nozero_f.mutable_data(x->dims(), place); const auto& runner_x_nonzero_f = NpuOpRunner("Cast", @@ -106,7 +104,7 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { {{"dst_type", static_cast(0)}}); runner_x_nonzero_f.Run(stream); - Tensor x_grad_w(x->type()); + phi::DenseTensor x_grad_w(x->type()); x_grad_w.mutable_data(x->dims(), place); const auto& runner_x_grad_w = NpuOpRunner("Mul", {x_nozero_f, y_div}, {x_grad_w}, {}); @@ -120,19 +118,19 @@ class ElementwiseDivGradNPUKernel : public framework::OpKernel { if (dy) { dy->mutable_data(place); - Tensor neg_out(out->type()); + phi::DenseTensor neg_out(out->type()); neg_out.mutable_data(out->dims(), place); const auto& runner_neg_out = NpuOpRunner("Neg", {*out}, {neg_out}, {}); runner_neg_out.Run(stream); - Tensor tmp_mul(out->type()); + phi::DenseTensor tmp_mul(out->type()); tmp_mul.mutable_data(out->dims(), place); const auto& runner_mul = NpuOpRunner("Mul", {neg_out, *dout}, {tmp_mul}, {}); runner_mul.Run(stream); if (dy->dims() != dout->dims()) { - Tensor reduced_tmp_mul(y->type()); + phi::DenseTensor reduced_tmp_mul(y->type()); reduced_tmp_mul.mutable_data(y->dims(), place); std::vector axes; diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc index 396f1b6f6223a..5f1b84112b2f9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseFloorDivNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc index fe91c28cd1f05..14bfbfb693b06 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseMaxNPUKernel : public framework::OpKernel { public: @@ -51,7 +49,7 @@ class ElementwiseMaxNPUKernel : public framework::OpKernel { const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*out}, {}); runner.Run(stream); } else { - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; NpuElementWiseOpBroadcast( dev_ctx, x, y, axis, &transformed_x, &transformed_y); const auto& runner = @@ -85,7 +83,7 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel { auto x_dims = x->dims(); auto y_dims = y->dims(); axis = (axis == -1 ? std::abs(x_dims.size() - y_dims.size()) : axis); - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; NpuElementWiseOpBroadcast( dev_ctx, x, y, axis, &transformed_x, &transformed_y); @@ -99,9 +97,9 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel { if (dx && dy) { dx->mutable_data(ctx.GetPlace()); dy->mutable_data(ctx.GetPlace()); - Tensor tmp_dx; + phi::DenseTensor tmp_dx; tmp_dx.mutable_data(dout_dims, ctx.GetPlace()); - Tensor tmp_dy; + phi::DenseTensor tmp_dy; tmp_dy.mutable_data(dout_dims, ctx.GetPlace()); const auto& runner = NpuOpRunner("MaximumGrad", @@ -153,12 +151,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel { } } else if (dx) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(dout_dims, ctx.GetPlace()); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); dx->mutable_data(ctx.GetPlace()); - Tensor tmp_dx; + phi::DenseTensor tmp_dx; tmp_dx.mutable_data(dout_dims, ctx.GetPlace()); const auto& runner = NpuOpRunner("MaximumGrad", @@ -190,12 +188,12 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel { } } else if (dy) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(dout_dims, ctx.GetPlace()); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); dy->mutable_data(ctx.GetPlace()); - Tensor tmp_dy; + phi::DenseTensor tmp_dy; tmp_dy.mutable_data(dout_dims, ctx.GetPlace()); const auto& runner = NpuOpRunner("MaximumGrad", diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc index 861ed2046c077..43b25b5127c8b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseMinMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc index 8014f82ca5742..86c37e0c89020 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseMinNPUKernel : public framework::OpKernel { public: @@ -48,7 +46,7 @@ class ElementwiseMinNPUKernel : public framework::OpKernel { } else { direct_compute = x_dims == phi::slice_ddim(y_dims, axis, y_dims.size()); } - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; if (direct_compute) { transformed_x.ShareDataWith(*x); transformed_y.ShareDataWith(*y); @@ -82,7 +80,7 @@ class ElementwiseMinGradNPUKernel : public framework::OpKernel { if (dx && dy) { // dx dx->mutable_data(ctx.GetPlace()); - Tensor tmp_x; + phi::DenseTensor tmp_x; tmp_x.ShareDataWith(*dx); if (dx->dims() != dout->dims()) { std::vector dst_dims_vec_x; @@ -105,7 +103,7 @@ class ElementwiseMinGradNPUKernel : public framework::OpKernel { } // dy dy->mutable_data(ctx.GetPlace()); - Tensor tmp_y; + phi::DenseTensor tmp_y; tmp_y.ShareDataWith(*dy); if (dy->dims() != dout->dims()) { std::vector dst_dims_vec_y; @@ -134,12 +132,12 @@ class ElementwiseMinGradNPUKernel : public framework::OpKernel { runner.Run(stream); } else if (dx) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(y->dims(), ctx.GetPlace()); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); // dx dx->mutable_data(ctx.GetPlace()); - Tensor tmp_x; + phi::DenseTensor tmp_x; tmp_x.ShareDataWith(*dx); if (dx->dims() != dout->dims()) { std::vector dst_dims_vec_x; @@ -168,13 +166,13 @@ class ElementwiseMinGradNPUKernel : public framework::OpKernel { runner.Run(stream); } else if (dy) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(x->dims(), ctx.GetPlace()); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); // dy dy->mutable_data(ctx.GetPlace()); - Tensor tmp_y; + phi::DenseTensor tmp_y; tmp_y.ShareDataWith(*dy); if (dy->dims() != dout->dims()) { std::vector dst_dims_vec_y; diff --git a/paddle/fluid/operators/elementwise/elementwise_mlu.h b/paddle/fluid/operators/elementwise/elementwise_mlu.h index 57f4b0c057686..9a33d5a26ad54 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mlu.h +++ b/paddle/fluid/operators/elementwise/elementwise_mlu.h @@ -309,7 +309,7 @@ void MLUMinMaxGradHelper(const framework::ExecutionContext& ctx) { // mask = Logic(x, y) only support min & max cnnlLogicOp_t logic = Functor == MAXIMUM_GRAD ? CNNL_LOGIC_OP_GE : CNNL_LOGIC_OP_LE; - Tensor mask(x->dtype()); + phi::DenseTensor mask(x->dtype()); mask.Resize(phi::make_ddim(out_dims_array)); mask.mutable_data(ctx.GetPlace()); @@ -327,7 +327,7 @@ void MLUMinMaxGradHelper(const framework::ExecutionContext& ctx) { GetBasePtr(&mask)); // dx = Mul(dz, mask) - Tensor dx_temp(x->dtype()); + phi::DenseTensor dx_temp(x->dtype()); dx_temp.Resize(dout->dims()); dx_temp.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc dout_desc(*dout); @@ -344,7 +344,7 @@ void MLUMinMaxGradHelper(const framework::ExecutionContext& ctx) { data_type); // dy = Sub(dz, dx) - Tensor dy_temp(y->dtype()); + phi::DenseTensor dy_temp(y->dtype()); dy_temp.Resize(dout->dims()); dy_temp.mutable_data(ctx.GetPlace()); MLUCnnlOpTensorDesc sub_op_desc( diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc index bdeef48389b6c..f73fbba0fb496 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseModNPUKernel : public framework::OpKernel { public: @@ -43,7 +41,7 @@ class ElementwiseModNPUKernel : public framework::OpKernel { direct_compute = x_dims == phi::slice_ddim(y_dims, axis, y_dims.size()); } - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; if (direct_compute) { transformed_x.ShareDataWith(*x); transformed_y.ShareDataWith(*y); diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index c7b872af75a44..5aa1b7ed4f1dd 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -25,7 +25,6 @@ namespace operators { class ElementwiseMulOp : public ElementwiseOp { public: - using Tensor = phi::DenseTensor; using ElementwiseOp::ElementwiseOp; framework::OpKernelType GetExpectedKernelType( diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc index fe2848621c76f..c5f8a0ad711a6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using MLUDeviceContext = platform::MLUDeviceContext; template @@ -73,7 +72,7 @@ class ElementwiseMulGradMLUKernel : public framework::OpKernel { GetBasePtr(dx), ToCnnlDataType()); } else { - Tensor dx_temp(x->dtype()); + phi::DenseTensor dx_temp(x->dtype()); dx_temp.Resize(dout->dims()); dx_temp.mutable_data(ctx.GetPlace()); MLUCnnl::OpTensor(ctx, @@ -121,7 +120,7 @@ class ElementwiseMulGradMLUKernel : public framework::OpKernel { GetBasePtr(dy), ToCnnlDataType()); } else { - Tensor dy_temp(y->dtype()); + phi::DenseTensor dy_temp(y->dtype()); dy_temp.Resize(dout->dims()); dy_temp.mutable_data(ctx.GetPlace()); MLUCnnl::OpTensor(ctx, diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc index 4fc3be1b29cc7..d9bf2adeee72c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -78,7 +77,7 @@ class ElementwiseMulNPUKernel : public framework::OpKernel { const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*out}, {}); runner.Run(stream); } else { - Tensor trans_x, trans_y; + phi::DenseTensor trans_x, trans_y; NpuElementWiseOpBroadcast(dev_ctx, x, y, axis, &trans_x, &trans_y); const auto& runner = NpuOpRunner("Mul", {trans_x, trans_y}, {*out}, {}); runner.Run(stream); @@ -101,7 +100,7 @@ class ElementwiseMulGradNPUKernel : public framework::OpKernel { axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); auto stream = ctx.template device_context().stream(); - Tensor trans_x, trans_y; + phi::DenseTensor trans_x, trans_y; NpuElementWiseOpBroadcast(dev_ctx, x, y, axis, &trans_x, &trans_y); if (dx) { @@ -110,7 +109,7 @@ class ElementwiseMulGradNPUKernel : public framework::OpKernel { const auto& runner_dx = NpuOpRunner("Mul", {*dout, trans_y}, {*dx}, {}); runner_dx.Run(stream); } else { - Tensor dx_temp(x->type()); + phi::DenseTensor dx_temp(x->type()); dx_temp.Resize(trans_x.dims()); dx_temp.mutable_data(ctx.GetPlace()); const auto& runner_dx = @@ -126,7 +125,7 @@ class ElementwiseMulGradNPUKernel : public framework::OpKernel { const auto& runner_dy = NpuOpRunner("Mul", {trans_x, *dout}, {*dy}, {}); runner_dy.Run(stream); } else { - Tensor dy_temp(y->type()); + phi::DenseTensor dy_temp(y->type()); dy_temp.Resize(trans_y.dims()); dy_temp.mutable_data(ctx.GetPlace()); const auto& runner_dy = diff --git a/paddle/fluid/operators/elementwise/elementwise_npu.h b/paddle/fluid/operators/elementwise/elementwise_npu.h index b7e85c45f4c7c..d8ee104c66b99 100644 --- a/paddle/fluid/operators/elementwise/elementwise_npu.h +++ b/paddle/fluid/operators/elementwise/elementwise_npu.h @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, @@ -32,12 +31,12 @@ void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, // 1. expand the axis with dim 1 auto src_dims = src->dims(); - Tensor tmp_src; + phi::DenseTensor tmp_src; tmp_src.ShareDataWith(*src); tmp_src.Resize(src_dims); for (int i = 0; i < src_dims.size(); ++i) { if (src_dims[i] == 1 && dst_dims[i + axis] > 1) { - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; auto tmp_tensor_dims = tmp_src.dims(); tmp_tensor_dims[i] = dst_dims[i + axis]; tmp_tensor.mutable_data(tmp_tensor_dims, dev_ctx.GetPlace()); @@ -56,7 +55,7 @@ void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, // 2.expand the ahead axis auto prev = phi::product(phi::slice_ddim(dst_dims, 0, axis)); if (prev > 1) { - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; auto tmp_tensor_dims = phi::slice_ddim(dst_dims, 0, axis + src_dims.size()); tmp_tensor.mutable_data(tmp_tensor_dims, dev_ctx.GetPlace()); const auto& runner = @@ -79,7 +78,7 @@ void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, src_dims_vec.push_back(1); tmp_src.Resize(phi::make_ddim(src_dims_vec)); - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.mutable_data(dst_dims, dev_ctx.GetPlace()); const auto& runner = NpuOpRunner("TileWithAxis", diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index 6bc9c345fcd4e..1ed8f4eb012a2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -36,8 +36,6 @@ class ElementwiseOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; - void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ElementwiseOp"); OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "ElementwiseOp"); @@ -282,7 +280,6 @@ For example: class ElementwiseOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { auto out_grad_name = framework::GradVarName("Out"); @@ -330,7 +327,6 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { auto x_grad_name = framework::GradVarName("X"); @@ -376,7 +372,6 @@ class ElementwiseOpDoubleGradWithoutDXDY : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { if (ctx->HasOutput("DDOut")) { @@ -427,7 +422,6 @@ class ElementwiseOpDoubleGradWithoutDXDY class ElementwiseOpTripleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { if (ctx->HasOutput("D_DDX")) { diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc index 6942377049b47..77d1160e4ce16 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwisePowMLUKernel : public framework::OpKernel { public: @@ -64,11 +62,11 @@ class ElementwisePowGradMLUKernel : public framework::OpKernel { auto dout_dims = dout->dims(); if (dx) { // dx = dout * y * pow(x, y - 1); - Tensor one_dx(y->type()); + phi::DenseTensor one_dx(y->type()); one_dx.mutable_data(phi::make_ddim(y_dims_array), place); FillMLUTensorWithHostValue(ctx, static_cast(1), &one_dx); - Tensor sub_dx(y->type()); + phi::DenseTensor sub_dx(y->type()); sub_dx.mutable_data(phi::make_ddim(y_dims_array), place); MLUCnnlOpTensorDesc op_tensor_desc( CNNL_OP_TENSOR_SUB, data_type, CNNL_NOT_PROPAGATE_NAN); @@ -82,7 +80,7 @@ class ElementwisePowGradMLUKernel : public framework::OpKernel { GetBasePtr(&sub_dx), data_type); - Tensor tmp_dx(x->type()); + phi::DenseTensor tmp_dx(x->type()); tmp_dx.mutable_data(phi::make_ddim(out_dims_array), place); MLUCnnl::Pow(ctx, CNNL_COMPUTATION_HIGH_PRECISION, @@ -134,7 +132,7 @@ class ElementwisePowGradMLUKernel : public framework::OpKernel { } if (dy) { // dy = dout * log(x) * pow(x, y) - Tensor tmp_dy(y->type()); + phi::DenseTensor tmp_dy(y->type()); tmp_dy.mutable_data(phi::make_ddim(out_dims_array), place); MLUCnnl::Pow(ctx, CNNL_COMPUTATION_HIGH_PRECISION, @@ -145,7 +143,7 @@ class ElementwisePowGradMLUKernel : public framework::OpKernel { out_desc.get(), GetBasePtr(&tmp_dy)); - Tensor log_x(x->type()); + phi::DenseTensor log_x(x->type()); log_x.mutable_data(x->dims(), place); MLUCnnl::Log(ctx, CNNL_COMPUTATION_HIGH_PRECISION, diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc index 18853222ba6b7..b0b1b37c4f78d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwisePowNPUKernel : public framework::OpKernel { public: @@ -56,7 +54,7 @@ class ElementwisePowNPUKernel : public framework::OpKernel { const auto& runner = NpuOpRunner("Pow", {*x, *y}, {*out}, {}); runner.Run(stream); } else { - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; NpuElementWiseOpBroadcast( dev_ctx, x, y, axis, &transformed_x, &transformed_y); const auto& runner = @@ -84,7 +82,7 @@ class ElementwisePowGradNPUKernel : public framework::OpKernel { auto y_dims = y->dims(); axis = (axis < 0 ? std::abs(x_dims.size() - y_dims.size()) + axis + 1 : axis); - Tensor transformed_x, transformed_y; + phi::DenseTensor transformed_x, transformed_y; NpuElementWiseOpBroadcast( dev_ctx, x, y, axis, &transformed_x, &transformed_y); @@ -93,34 +91,34 @@ class ElementwisePowGradNPUKernel : public framework::OpKernel { // Reshape info vector. std::vector reduce_axes; if (dx) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(dout_dims, place); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); dx->mutable_data(place); - Tensor tmp_dx; + phi::DenseTensor tmp_dx; tmp_dx.mutable_data(dout_dims, place); // dx = dout * y * pow(x, y - 1); - Tensor PowGrad_dx_temp1(dout->type()); + phi::DenseTensor PowGrad_dx_temp1(dout->type()); PowGrad_dx_temp1.mutable_data(dout->dims(), place); const auto& runner_PowGrad_dx_temp1 = NpuOpRunner("Mul", {*dout, transformed_y}, {PowGrad_dx_temp1}, {}); runner_PowGrad_dx_temp1.Run(stream); - Tensor one_dx(transformed_y.type()); + phi::DenseTensor one_dx(transformed_y.type()); one_dx.mutable_data(transformed_y.dims(), place); const auto& runner_one_dx = NpuOpRunner("OnesLike", {transformed_y}, {one_dx}, {}); runner_one_dx.Run(stream); - Tensor sub_dx(transformed_y.type()); + phi::DenseTensor sub_dx(transformed_y.type()); sub_dx.mutable_data(transformed_y.dims(), place); const auto& runner_sub_dx = NpuOpRunner("Sub", {transformed_y, one_dx}, {sub_dx}, {}); runner_sub_dx.Run(stream); - Tensor PowGrad_dx_temp2(transformed_x.type()); + phi::DenseTensor PowGrad_dx_temp2(transformed_x.type()); PowGrad_dx_temp2.mutable_data(transformed_x.dims(), place); const auto& runner_PowGrad_dx_temp2 = NpuOpRunner("Pow", {transformed_x, sub_dx}, {PowGrad_dx_temp2}, {}); @@ -153,39 +151,39 @@ class ElementwisePowGradNPUKernel : public framework::OpKernel { } } if (dy) { - Tensor zero_tensor(dout->type()); + phi::DenseTensor zero_tensor(dout->type()); zero_tensor.mutable_data(dout_dims, place); FillNpuTensorWithConstant(&zero_tensor, static_cast(0)); dy->mutable_data(place); - Tensor tmp_dy; + phi::DenseTensor tmp_dy; tmp_dy.mutable_data(dout_dims, place); // dy = dout * log(x) * pow(x, y) - Tensor PowGrad_dy_temp1(transformed_x.type()); + phi::DenseTensor PowGrad_dy_temp1(transformed_x.type()); PowGrad_dy_temp1.mutable_data(transformed_x.dims(), place); const auto& runner_PowGrad_dy_temp1 = NpuOpRunner( "Pow", {transformed_x, transformed_y}, {PowGrad_dy_temp1}, {}); runner_PowGrad_dy_temp1.Run(stream); - Tensor one_dy(transformed_x.type()); + phi::DenseTensor one_dy(transformed_x.type()); one_dy.mutable_data(transformed_x.dims(), place); const auto& runner_one_dy = NpuOpRunner("OnesLike", {transformed_x}, {one_dy}, {}); runner_one_dy.Run(stream); - Tensor sub_dy(transformed_x.type()); + phi::DenseTensor sub_dy(transformed_x.type()); sub_dy.mutable_data(transformed_x.dims(), place); const auto& runner_sub_dy = NpuOpRunner("Sub", {transformed_x, one_dy}, {sub_dy}, {}); runner_sub_dy.Run(stream); - Tensor log_dy(transformed_x.type()); + phi::DenseTensor log_dy(transformed_x.type()); log_dy.mutable_data(transformed_x.dims(), place); const auto& runner_log_dy = NpuOpRunner("Log1p", {sub_dy}, {log_dy}, {}); runner_log_dy.Run(stream); - Tensor PowGrad_dy_temp2(transformed_x.type()); + phi::DenseTensor PowGrad_dy_temp2(transformed_x.type()); PowGrad_dy_temp2.mutable_data(transformed_x.dims(), place); const auto& runner_PowGrad_dy_temp2 = NpuOpRunner( "Mul", {log_dy, PowGrad_dy_temp1}, {PowGrad_dy_temp2}, {}); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc index 0f56044d268e4..1233ae2d0ae0c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseSubMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc index 8df295a972559..9f70961c9f620 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ElementwiseSubNPUKernel : public framework::OpKernel { public: @@ -76,7 +74,7 @@ class ElementwiseSubGradNPUKernel : public framework::OpKernel { axes.push_back(i); } phi::DenseTensor* tmp_dout = const_cast(dout); - Tensor reduced_dout(dx->type()); + phi::DenseTensor reduced_dout(dx->type()); if (axes.size() != 0) { std::vector reduced_dout_dims; for (auto i = reduce_ndim; i < dout->dims().size(); ++i) { @@ -124,8 +122,8 @@ class ElementwiseSubGradNPUKernel : public framework::OpKernel { axes.push_back(i); } phi::DenseTensor* tmp_dout = const_cast(dout); - Tensor reduced_dy(dy->type()); - Tensor reduced_dout(dy->type()); + phi::DenseTensor reduced_dy(dy->type()); + phi::DenseTensor reduced_dout(dy->type()); if (axes.size() != 0) { std::vector reduced_dout_dims; diff --git a/paddle/fluid/operators/expand_as_op.h b/paddle/fluid/operators/expand_as_op.h index 58b6b619c231a..a3462a00bcfb1 100644 --- a/paddle/fluid/operators/expand_as_op.h +++ b/paddle/fluid/operators/expand_as_op.h @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/expand_as_v2_op.h b/paddle/fluid/operators/expand_as_v2_op.h index 1205fc0447f1e..2c62dc570ff21 100644 --- a/paddle/fluid/operators/expand_as_v2_op.h +++ b/paddle/fluid/operators/expand_as_v2_op.h @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/expand_as_v2_op_mlu.cc b/paddle/fluid/operators/expand_as_v2_op_mlu.cc index 8184af44916bb..71b154ff02274 100644 --- a/paddle/fluid/operators/expand_as_v2_op_mlu.cc +++ b/paddle/fluid/operators/expand_as_v2_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ExpandAsV2MLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/expand_op.h b/paddle/fluid/operators/expand_op.h index 35d16311a97b3..6d6739eed6702 100644 --- a/paddle/fluid/operators/expand_op.h +++ b/paddle/fluid/operators/expand_op.h @@ -85,7 +85,6 @@ inline std::vector get_expand_times( } } -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index d5748328b1d4d..95a4147c88dbd 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ExpandV2NPUKernel : public framework::OpKernel { public: @@ -121,8 +120,8 @@ class ExpandV2NPUKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); - auto op_func = [](const std::vector& inputs, - const std::vector& outputs, + auto op_func = [](const std::vector& inputs, + const std::vector& outputs, const NPUAttributeMap& attrs, const platform::NPUDeviceContext& dev_ctx) { const auto& runner = NpuOpRunner("ExpandD", inputs, outputs, attrs); @@ -174,8 +173,8 @@ class ExpandV2NPUGradKernel : public framework::OpKernel { axes.push_back(i); } - Tensor tmp_dout(dout->dtype()); - Tensor reduced_dout(dx->dtype()); + phi::DenseTensor tmp_dout(dout->dtype()); + phi::DenseTensor reduced_dout(dx->dtype()); tmp_dout.ShareDataWith(*dout); if (axes.size() != 0) { std::vector reduced_dout_dims; diff --git a/paddle/fluid/operators/eye_op_npu.cc b/paddle/fluid/operators/eye_op_npu.cc index 6a01992c83335..74bbc531c27e3 100644 --- a/paddle/fluid/operators/eye_op_npu.cc +++ b/paddle/fluid/operators/eye_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class EyeNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fc_op.h b/paddle/fluid/operators/fc_op.h index 433288e885d01..025c73db8c375 100644 --- a/paddle/fluid/operators/fc_op.h +++ b/paddle/fluid/operators/fc_op.h @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; inline void FCOutputSize(const framework::DDim& in_dims, const framework::DDim& w_dims, diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc index 22df3e5a9d23a..a3ea1af82ee4d 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { public: @@ -80,7 +78,7 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { } else { out->mutable_data(ctx.GetPlace(), framework::TransToPhiDataType(data_type)); - Tensor tensor_tmp(framework::TransToPhiDataType(data_type)); + phi::DenseTensor tensor_tmp(framework::TransToPhiDataType(data_type)); tensor_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&tensor_tmp, value); diff --git a/paddle/fluid/operators/fill_constant_op_mlu.cc b/paddle/fluid/operators/fill_constant_op_mlu.cc index 664d70609e939..8263534f4eeeb 100644 --- a/paddle/fluid/operators/fill_constant_op_mlu.cc +++ b/paddle/fluid/operators/fill_constant_op_mlu.cc @@ -60,7 +60,8 @@ class FillConstantMLUKernel : public framework::OpKernel { value_tensor->numel(), 1, platform::errors::InvalidArgument( - "When use Tensor as value to set Tensor value in fill_cosntant, " + "When use phi::DenseTensor as value to set phi::DenseTensor " + "value in fill_cosntant, " "value input(ValueTensor) size must be 1, but get %d", value_tensor->numel())); value_data = value_tensor->data(); diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu index 56068684e16ce..c07a69177b832 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ b/paddle/fluid/operators/filter_by_instag_op.cu @@ -43,7 +43,6 @@ namespace cg = cooperative_groups; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; template diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 04f1099168a5c..95e6611d9351f 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -29,7 +29,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; template diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index 65d3f809fa11c..81af8e64f2767 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -28,8 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FlattenOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/flatten_op_npu.cc b/paddle/fluid/operators/flatten_op_npu.cc index 177825020d0dc..6c8f986c5e5df 100644 --- a/paddle/fluid/operators/flatten_op_npu.cc +++ b/paddle/fluid/operators/flatten_op_npu.cc @@ -55,8 +55,6 @@ class Flatten2GradNPUKernel : public framework::OpKernel { } }; -using Tensor = phi::DenseTensor; - template class FlattenContiguousRangeNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fsp_op.h b/paddle/fluid/operators/fsp_op.h index 0f8072520be2f..c5b903559a07b 100644 --- a/paddle/fluid/operators/fsp_op.h +++ b/paddle/fluid/operators/fsp_op.h @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FSPOpKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fused/attn_gemm.h b/paddle/fluid/operators/fused/attn_gemm.h index c8ea19d463a1b..c0157c8cb04dd 100644 --- a/paddle/fluid/operators/fused/attn_gemm.h +++ b/paddle/fluid/operators/fused/attn_gemm.h @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; // support gemm-nt and gemm-nn, which is used in fused_attention_op. template class AttnMatMul { diff --git a/paddle/fluid/operators/fused/attn_gemm_int8.h b/paddle/fluid/operators/fused/attn_gemm_int8.h index cdbd5b2e0b821..e26273b745260 100644 --- a/paddle/fluid/operators/fused/attn_gemm_int8.h +++ b/paddle/fluid/operators/fused/attn_gemm_int8.h @@ -27,7 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using phi::backends::gpu::GpuLaunchConfig; template diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index 87ed8fb68fe2a..dee0c1837a452 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -27,7 +27,6 @@ namespace paddle { namespace operators { #if PADDLE_WITH_HIP || CUDNN_VERSION >= 7100 -using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; @@ -77,8 +76,8 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { const std::string padding_algorithm = ctx.Attr("padding_algorithm"); - Tensor transformed_input_channel(input->dtype()); - Tensor transformed_output(output->dtype()); + phi::DenseTensor transformed_input_channel(input->dtype()); + phi::DenseTensor transformed_output(output->dtype()); transformed_input_channel = *input; transformed_output = *output; T* output_data = transformed_output.data(); @@ -99,7 +98,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { int data_dim = strides.size(); // 2d or 3d bool is_sys_pad = phi::funcs::IsSymmetricPadding(paddings, data_dim); - Tensor transformed_input; + phi::DenseTensor transformed_input; std::vector padding_common(data_dim, 0); if (!is_sys_pad) { std::vector padding_diff(data_dim); @@ -144,7 +143,8 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { } break; default: PADDLE_THROW(platform::errors::PermissionDenied( - "Operator Conv2DFusion expects Input to be a 4-D or 5-D Tensor. " + "Operator Conv2DFusion expects Input to be a 4-D or 5-D " + "phi::DenseTensor. " "But received the actual dimension = %d, shape = [%s].", rank, transformed_input_channel.dims())); diff --git a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc index 7c9af00955963..cbf098819212f 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc +++ b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc @@ -31,7 +31,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace framework = paddle::framework; namespace platform = paddle::platform; namespace op = paddle::operators; -using Tensor = phi::DenseTensor; USE_OP_ITSELF(batch_norm); PD_DECLARE_KERNEL(batch_norm, GPU, ALL_LAYOUT); @@ -149,15 +148,15 @@ void ComputeInplaceRelu(phi::DenseTensor *cpu_x) { } void ComputeBatchNormForward(const phi::GPUContext &ctx, - const Tensor &cpu_x, - const Tensor &cpu_scale, - const Tensor &cpu_bias, - Tensor *cpu_mean, - Tensor *cpu_var, - Tensor *cpu_saved_mean, - Tensor *cpu_saved_var, - Tensor *cpu_y, - Tensor *saved_reserve_space) { + const phi::DenseTensor &cpu_x, + const phi::DenseTensor &cpu_scale, + const phi::DenseTensor &cpu_bias, + phi::DenseTensor *cpu_mean, + phi::DenseTensor *cpu_var, + phi::DenseTensor *cpu_saved_mean, + phi::DenseTensor *cpu_saved_var, + phi::DenseTensor *cpu_y, + phi::DenseTensor *saved_reserve_space) { framework::Scope scope; auto *x = scope.Var("X")->GetMutable(); auto *scale = scope.Var("Scale")->GetMutable(); @@ -215,16 +214,16 @@ void ComputeBatchNormForward(const phi::GPUContext &ctx, } void ComputeFusedBNAddReluForward(const phi::GPUContext &ctx, - const Tensor &cpu_x, - const Tensor &cpu_z, - const Tensor &cpu_scale, - const Tensor &cpu_bias, - Tensor *cpu_mean, - Tensor *cpu_var, - Tensor *cpu_saved_mean, - Tensor *cpu_saved_var, - Tensor *cpu_y, - Tensor *saved_reserve_space) { + const phi::DenseTensor &cpu_x, + const phi::DenseTensor &cpu_z, + const phi::DenseTensor &cpu_scale, + const phi::DenseTensor &cpu_bias, + phi::DenseTensor *cpu_mean, + phi::DenseTensor *cpu_var, + phi::DenseTensor *cpu_saved_mean, + phi::DenseTensor *cpu_saved_var, + phi::DenseTensor *cpu_y, + phi::DenseTensor *saved_reserve_space) { framework::Scope scope; auto *x = scope.Var("X")->GetMutable(); auto *z = scope.Var("Z")->GetMutable(); @@ -278,18 +277,18 @@ void ComputeFusedBNAddReluForward(const phi::GPUContext &ctx, } void ComputeFusedBNAddReluBackward(const phi::GPUContext &ctx, - const Tensor &cpu_dy, - const Tensor &cpu_x, - const Tensor &cpu_scale, - const Tensor &cpu_bias, - const Tensor &cpu_saved_mean, - const Tensor &cpu_saved_var, - const Tensor &cpu_y, - const Tensor &saved_reserve_space, - Tensor *cpu_dx, - Tensor *cpu_dz, - Tensor *cpu_dscale, - Tensor *cpu_dbias) { + const phi::DenseTensor &cpu_dy, + const phi::DenseTensor &cpu_x, + const phi::DenseTensor &cpu_scale, + const phi::DenseTensor &cpu_bias, + const phi::DenseTensor &cpu_saved_mean, + const phi::DenseTensor &cpu_saved_var, + const phi::DenseTensor &cpu_y, + const phi::DenseTensor &saved_reserve_space, + phi::DenseTensor *cpu_dx, + phi::DenseTensor *cpu_dz, + phi::DenseTensor *cpu_dscale, + phi::DenseTensor *cpu_dbias) { framework::Scope scope; auto *x = scope.Var("X")->GetMutable(); auto *y = scope.Var("Y")->GetMutable(); @@ -383,7 +382,9 @@ class CudnnBNAddReluTester { phi::GPUContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); - auto select = [&](Tensor *in) { return has_shortcut_ ? in : nullptr; }; + auto select = [&](phi::DenseTensor *in) { + return has_shortcut_ ? in : nullptr; + }; phi::DenseTensor cpu_mean_base_x; phi::DenseTensor cpu_var_base_x; @@ -506,10 +507,10 @@ class CudnnBNAddReluTester { InitRandomTensor({batch_size_, height_, width_, channels_}, &cpu_dy_); } - void InitMeanVar(Tensor *cpu_mean, - Tensor *cpu_var, - Tensor *cpu_saved_mean, - Tensor *cpu_saved_var) { + void InitMeanVar(phi::DenseTensor *cpu_mean, + phi::DenseTensor *cpu_var, + phi::DenseTensor *cpu_saved_mean, + phi::DenseTensor *cpu_saved_var) { InitConstantTensor({channels_}, static_cast(0.0f), cpu_mean); InitConstantTensor({channels_}, static_cast(1.0f), cpu_var); InitConstantTensor( @@ -519,17 +520,17 @@ class CudnnBNAddReluTester { } void BaselineForward(const phi::GPUContext &ctx, - Tensor *cpu_mean_x, - Tensor *cpu_var_x, - Tensor *cpu_saved_mean_x, - Tensor *cpu_saved_var_x, - Tensor *cpu_y, - Tensor *saved_reserve_space_x, - Tensor *cpu_mean_z = nullptr, - Tensor *cpu_var_z = nullptr, - Tensor *cpu_saved_mean_z = nullptr, - Tensor *cpu_saved_var_z = nullptr, - Tensor *saved_reserve_space_z = nullptr) { + phi::DenseTensor *cpu_mean_x, + phi::DenseTensor *cpu_var_x, + phi::DenseTensor *cpu_saved_mean_x, + phi::DenseTensor *cpu_saved_var_x, + phi::DenseTensor *cpu_y, + phi::DenseTensor *saved_reserve_space_x, + phi::DenseTensor *cpu_mean_z = nullptr, + phi::DenseTensor *cpu_var_z = nullptr, + phi::DenseTensor *cpu_saved_mean_z = nullptr, + phi::DenseTensor *cpu_saved_var_z = nullptr, + phi::DenseTensor *saved_reserve_space_z = nullptr) { InitMeanVar(cpu_mean_x, cpu_var_x, cpu_saved_mean_x, cpu_saved_var_x); ComputeBatchNormForward(ctx, cpu_x_, @@ -566,12 +567,12 @@ class CudnnBNAddReluTester { } void BaselineForwardFusedBNAddRelu(const phi::GPUContext &ctx, - Tensor *cpu_mean, - Tensor *cpu_var, - Tensor *cpu_saved_mean, - Tensor *cpu_saved_var, - Tensor *cpu_y, - Tensor *saved_reserve_space) { + phi::DenseTensor *cpu_mean, + phi::DenseTensor *cpu_var, + phi::DenseTensor *cpu_saved_mean, + phi::DenseTensor *cpu_saved_var, + phi::DenseTensor *cpu_y, + phi::DenseTensor *saved_reserve_space) { InitMeanVar(cpu_mean, cpu_var, cpu_saved_mean, cpu_saved_var); ComputeFusedBNAddReluForward(ctx, cpu_x_, @@ -587,10 +588,10 @@ class CudnnBNAddReluTester { } void BaselineBackwardFusedBNAddRelu(const phi::GPUContext &ctx, - Tensor *cpu_dx, - Tensor *cpu_dz, - Tensor *cpu_dscale, - Tensor *cpu_dbias) { + phi::DenseTensor *cpu_dx, + phi::DenseTensor *cpu_dz, + phi::DenseTensor *cpu_dscale, + phi::DenseTensor *cpu_dbias) { ComputeFusedBNAddReluBackward(ctx, cpu_dy_, cpu_x_, @@ -607,19 +608,19 @@ class CudnnBNAddReluTester { } void ComputeFusedBNStatsFinalize(const phi::GPUContext &ctx, - const Tensor &cpu_x, - const Tensor &cpu_bn_scale, - const Tensor &cpu_bn_bias, - Tensor *sum, - Tensor *sum_of_square, - Tensor *bn_scale, - Tensor *bn_bias, - Tensor *mean, - Tensor *var, - Tensor *saved_mean, - Tensor *saved_var, - Tensor *equiv_scale, - Tensor *equiv_bias) { + const phi::DenseTensor &cpu_x, + const phi::DenseTensor &cpu_bn_scale, + const phi::DenseTensor &cpu_bn_bias, + phi::DenseTensor *sum, + phi::DenseTensor *sum_of_square, + phi::DenseTensor *bn_scale, + phi::DenseTensor *bn_bias, + phi::DenseTensor *mean, + phi::DenseTensor *var, + phi::DenseTensor *saved_mean, + phi::DenseTensor *saved_var, + phi::DenseTensor *equiv_scale, + phi::DenseTensor *equiv_bias) { phi::DenseTensor cpu_sum; phi::DenseTensor cpu_sum_of_square; ComputeSumAndSquareSum(cpu_x, &cpu_sum, &cpu_sum_of_square); @@ -664,16 +665,16 @@ class CudnnBNAddReluTester { // Get forward results of CudnnBNStatsFinalize + CudnnScaleBiasAddRelu void FusedForward(const phi::GPUContext &ctx, - Tensor *cpu_mean_x, - Tensor *cpu_var_x, - Tensor *cpu_saved_mean_x, - Tensor *cpu_saved_var_x, - Tensor *cpu_y, - Tensor *cpu_bitmask, - Tensor *cpu_mean_z = nullptr, - Tensor *cpu_var_z = nullptr, - Tensor *cpu_saved_mean_z = nullptr, - Tensor *cpu_saved_var_z = nullptr) { + phi::DenseTensor *cpu_mean_x, + phi::DenseTensor *cpu_var_x, + phi::DenseTensor *cpu_saved_mean_x, + phi::DenseTensor *cpu_saved_var_x, + phi::DenseTensor *cpu_y, + phi::DenseTensor *cpu_bitmask, + phi::DenseTensor *cpu_mean_z = nullptr, + phi::DenseTensor *cpu_var_z = nullptr, + phi::DenseTensor *cpu_saved_mean_z = nullptr, + phi::DenseTensor *cpu_saved_var_z = nullptr) { phi::DenseTensor x; phi::DenseTensor sum_x; phi::DenseTensor sum_of_square_x; @@ -802,10 +803,10 @@ class CudnnBNAddReluTester { // Get backward results of CudnnBNStatsFinalize + CudnnScaleBiasAddRelu void FusedBackward(const phi::GPUContext &ctx, - Tensor *cpu_dx, - Tensor *cpu_dz, - Tensor *cpu_dscale, - Tensor *cpu_dbias) { + phi::DenseTensor *cpu_dx, + phi::DenseTensor *cpu_dz, + phi::DenseTensor *cpu_dscale, + phi::DenseTensor *cpu_dbias) { phi::DenseTensor dy; phi::DenseTensor x; phi::DenseTensor bn_scale; diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index 0325a0e585ed3..762e86406917d 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; namespace dynload = platform::dynload; template using BatchNormParamType = @@ -70,16 +69,16 @@ class CudnnBNStatsFinalize { ~CudnnBNStatsFinalize() {} void Forward(const phi::GPUContext &ctx, - const Tensor &sum, - const Tensor &sum_of_squares, - const Tensor &scale, - const Tensor &bias, - Tensor *saved_mean, - Tensor *saved_invstd, - Tensor *running_mean, - Tensor *running_var, - Tensor *equiv_scale, - Tensor *equiv_bias, + const phi::DenseTensor &sum, + const phi::DenseTensor &sum_of_squares, + const phi::DenseTensor &scale, + const phi::DenseTensor &bias, + phi::DenseTensor *saved_mean, + phi::DenseTensor *saved_invstd, + phi::DenseTensor *running_mean, + phi::DenseTensor *running_var, + phi::DenseTensor *equiv_scale, + phi::DenseTensor *equiv_bias, double eps, float momentum, int64_t ele_count, diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index bf0e06b825e4b..c82ccc959d204 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; namespace dynload = platform::dynload; template @@ -195,11 +194,11 @@ class CudnnNormConvolution { ~CudnnNormConvolution() {} void Forward(const phi::GPUContext &ctx, - const Tensor &input, - const Tensor &filter, - Tensor *output, - Tensor *sum, - Tensor *sum_of_squares) { + const phi::DenseTensor &input, + const phi::DenseTensor &filter, + phi::DenseTensor *output, + phi::DenseTensor *sum, + phi::DenseTensor *sum_of_squares) { auto cudnn_handle = ctx.cudnn_handle(); CudnnFusionOp *fwd_op = GetForwardOp(ctx); @@ -314,11 +313,11 @@ class CudnnNormConvolutionGrad { ~CudnnNormConvolutionGrad() {} void Backward(const phi::GPUContext &ctx, - const Tensor &input, - const Tensor &filter, - const Tensor &output_grad, - Tensor *input_grad, - Tensor *filter_grad, + const phi::DenseTensor &input, + const phi::DenseTensor &filter, + const phi::DenseTensor &output_grad, + phi::DenseTensor *input_grad, + phi::DenseTensor *filter_grad, bool use_addto = false) { T *input_ptr = const_cast(input.data()); T *filter_ptr = const_cast(filter.data()); diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc index 3369a8ca4a9c5..4f7555aed8282 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc +++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc @@ -28,7 +28,6 @@ limitations under the License. */ namespace framework = paddle::framework; namespace platform = paddle::platform; namespace op = paddle::operators; -using Tensor = phi::DenseTensor; USE_OP_ITSELF(conv2d); USE_OP_ITSELF(conv2d_grad); @@ -95,9 +94,9 @@ void CheckOutput(const phi::DenseTensor &cpu_res, // Use Paddle conv2d op results as baseline void ComputeConv2DForward(const phi::GPUContext &ctx, - const Tensor &cpu_input, - const Tensor &cpu_filter, - Tensor *cpu_output, + const phi::DenseTensor &cpu_input, + const phi::DenseTensor &cpu_filter, + phi::DenseTensor *cpu_output, int stride, int padding) { framework::Scope scope; @@ -131,9 +130,9 @@ void ComputeConv2DForward(const phi::GPUContext &ctx, // Use Paddle conv2d_grad op results as baseline void ComputeConv2DBackward(const phi::GPUContext &ctx, - const Tensor &cpu_input, - const Tensor &cpu_filter, - const Tensor &cpu_output_grad, + const phi::DenseTensor &cpu_input, + const phi::DenseTensor &cpu_filter, + const phi::DenseTensor &cpu_output_grad, phi::DenseTensor *cpu_input_grad, phi::DenseTensor *cpu_filter_grad, int stride, diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index df79ed758dbc5..4ecc5795ff41a 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; namespace dynload = platform::dynload; @@ -117,14 +116,14 @@ class CudnnScaleBiasAddRelu { ~CudnnScaleBiasAddRelu() {} void Forward(const phi::GPUContext &ctx, - const Tensor &x, - const Tensor &x_scale, - const Tensor &x_bias, - const Tensor *z, - const Tensor *z_scale, - const Tensor *z_bias, - Tensor *out, - Tensor *bitmask) { + const phi::DenseTensor &x, + const phi::DenseTensor &x_scale, + const phi::DenseTensor &x_bias, + const phi::DenseTensor *z, + const phi::DenseTensor *z_scale, + const phi::DenseTensor *z_bias, + phi::DenseTensor *out, + phi::DenseTensor *bitmask) { ForwardInit(ctx); auto handle = ctx.cudnn_handle(); auto workspace_handle = ctx.cudnn_workspace_handle(); @@ -172,17 +171,17 @@ class CudnnScaleBiasAddRelu { } void Backward(const phi::GPUContext &ctx, - const Tensor &dy, - const Tensor &x, - const Tensor &scale, - const Tensor &bias, - const Tensor &saved_mean, - const Tensor &saved_invstd, - const Tensor *bitmask, - Tensor *dx, - Tensor *dz, - Tensor *dscale, - Tensor *dbias, + const phi::DenseTensor &dy, + const phi::DenseTensor &x, + const phi::DenseTensor &scale, + const phi::DenseTensor &bias, + const phi::DenseTensor &saved_mean, + const phi::DenseTensor &saved_invstd, + const phi::DenseTensor *bitmask, + phi::DenseTensor *dx, + phi::DenseTensor *dz, + phi::DenseTensor *dscale, + phi::DenseTensor *dbias, double eps) { BackwardInit(ctx); auto handle = ctx.cudnn_handle(); diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 11939a454b9a0..47459884cc544 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -27,8 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class AttnDropoutParam { public: AttnDropoutParam() { diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index 03c97ec345fb8..b05a63510e385 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusedAttentionOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index ef5087f0534e1..9454e589ec920 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -38,8 +38,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template static void AllReduce(phi::DenseTensor &tensor, // NOLINT const int ring_id, @@ -528,7 +526,7 @@ class FusedAttentionGradKernel : public framework::OpKernel { int input_size = dim_embed; bool add_residual = ctx.Attr("add_residual"); - Tensor d_residual; + phi::DenseTensor d_residual; T *d_residual_data = nullptr; if (add_residual) { d_residual.Resize(input_x_dims); @@ -728,8 +726,8 @@ class FusedAttentionGradKernel : public framework::OpKernel { if (add_residual) { // gradient accumulation - std::vector ins = {&d_residual, d_x}; - std::vector outs = {d_x}; + std::vector ins = {&d_residual, d_x}; + std::vector outs = {d_x}; phi::funcs::ElementwiseKernel( ctx.cuda_device_context(), ins, &outs, phi::funcs::AddFunctor()); } diff --git a/paddle/fluid/operators/fused/fused_attention_op_xpu.cc b/paddle/fluid/operators/fused/fused_attention_op_xpu.cc index 6bf2e3d80335f..bbfa48f1dca78 100644 --- a/paddle/fluid/operators/fused/fused_attention_op_xpu.cc +++ b/paddle/fluid/operators/fused/fused_attention_op_xpu.cc @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FusedAttentionOpKernel : public framework::OpKernel { public: @@ -33,86 +31,88 @@ class FusedAttentionOpKernel : public framework::OpKernel { using XPUTypeT = typename XPUTypeTrait::Type; // inputs tensor - auto *input_x = ctx.Input("X"); + auto *input_x = ctx.Input("X"); const auto pre_layer_norm = ctx.Attr("pre_layer_norm"); // shape [3, num_head, dim_head, dim_embed] - auto *qkv_weight = ctx.Input("QKVW"); + auto *qkv_weight = ctx.Input("QKVW"); // shape [3 , num_head, dim_head] - auto *qkv_bias = ctx.Input("QKVBias"); + auto *qkv_bias = ctx.Input("QKVBias"); // shape [batch_size, 1, 1, seq_len] - auto *src_mask = ctx.Input("SrcMask"); + auto *src_mask = ctx.Input("SrcMask"); // shape [dim_embed, dim_embed] - auto *out_linear_weight = ctx.Input("OutLinearW"); + auto *out_linear_weight = ctx.Input("OutLinearW"); // shape [dim_embed] - auto *out_linear_bias = ctx.Input("OutLinearBias"); + auto *out_linear_bias = ctx.Input("OutLinearBias"); - const Tensor *ln_scale = nullptr; - const Tensor *ln_bias = nullptr; + const phi::DenseTensor *ln_scale = nullptr; + const phi::DenseTensor *ln_bias = nullptr; float epsilon = 0.0f; if (pre_layer_norm) { - ln_scale = ctx.Input("LnScale"); - ln_bias = ctx.Input("LnBias"); + ln_scale = ctx.Input("LnScale"); + ln_bias = ctx.Input("LnBias"); epsilon = ctx.Attr("epsilon"); } else { - ln_scale = ctx.Input("Ln2Scale"); - ln_bias = ctx.Input("Ln2Bias"); + ln_scale = ctx.Input("Ln2Scale"); + ln_bias = ctx.Input("Ln2Bias"); epsilon = ctx.Attr("ln_epsilon"); } // outputs tensor // qkv 的值,并已经做了transpos后的值 // shape [3, batch_size, num_head, seq_len, dim_head] - auto *TransposeOut2 = ctx.Output("TransposeOut2"); + auto *TransposeOut2 = ctx.Output("TransposeOut2"); // shape [batch_size, num_head, seq_len, seq_len] - auto *softmax_out = ctx.Output("SoftmaxOut"); + auto *softmax_out = ctx.Output("SoftmaxOut"); // shape [batch_size, num_head, seq_len, seq_len] - auto *attn_dropout_mask_out = ctx.Output("AttnDropoutMaskOut"); + auto *attn_dropout_mask_out = + ctx.Output("AttnDropoutMaskOut"); // shape [batch_size, num_head, seq_len, seq_len] - auto *attn_dropout_out = ctx.Output("AttnDropoutOut"); + auto *attn_dropout_out = ctx.Output("AttnDropoutOut"); // shape [[batch_size, seq_len, num_head, dim_head]] - auto *fmha_out = ctx.Output("FMHAOut"); + auto *fmha_out = ctx.Output("FMHAOut"); // shape [batch_size, seq_len, dim_embed] - auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); + auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); // final output // shape [batch_size, seq_len, dim_embed] - auto *out = ctx.Output("Y"); + auto *out = ctx.Output("Y"); // 下面这个tensor是不需要返回, 但是新的动态图需要 - auto *QKOut = ctx.Output("QKOut"); + auto *QKOut = ctx.Output("QKOut"); QKOut->mutable_data(ctx.GetPlace()); - auto *QKTVOut = ctx.Output("QKTVOut"); + auto *QKTVOut = ctx.Output("QKTVOut"); QKTVOut->mutable_data(ctx.GetPlace()); - auto *OutLinearOut = ctx.Output("OutLinearOut"); + auto *OutLinearOut = ctx.Output("OutLinearOut"); OutLinearOut->mutable_data(ctx.GetPlace()); - auto *QKVBiasOut = ctx.Output("QKVBiasOut"); + auto *QKVBiasOut = ctx.Output("QKVBiasOut"); QKVBiasOut->mutable_data(ctx.GetPlace()); - auto *SrcMaskOut = ctx.Output("SrcMaskOut"); + auto *SrcMaskOut = ctx.Output("SrcMaskOut"); SrcMaskOut->mutable_data(ctx.GetPlace()); - auto *qkv_out = ctx.Output("QKVOut"); + auto *qkv_out = ctx.Output("QKVOut"); qkv_out->mutable_data(ctx.GetPlace()); - Tensor *bias_dropout_residual_out = nullptr; - Tensor *ln_mean = nullptr; - Tensor *ln_var = nullptr; - Tensor *ln_out = nullptr; + phi::DenseTensor *bias_dropout_residual_out = nullptr; + phi::DenseTensor *ln_mean = nullptr; + phi::DenseTensor *ln_var = nullptr; + phi::DenseTensor *ln_out = nullptr; if (pre_layer_norm) { - ln_mean = ctx.Output("LnMean"); - ln_var = ctx.Output("LnVariance"); - ln_out = ctx.Output("LnOut"); + ln_mean = ctx.Output("LnMean"); + ln_var = ctx.Output("LnVariance"); + ln_out = ctx.Output("LnOut"); } else { - ln_mean = ctx.Output("Ln2Mean"); - ln_var = ctx.Output("Ln2Variance"); - bias_dropout_residual_out = ctx.Output("BiasDropoutResidualOut"); + ln_mean = ctx.Output("Ln2Mean"); + ln_var = ctx.Output("Ln2Variance"); + bias_dropout_residual_out = + ctx.Output("BiasDropoutResidualOut"); } // dropout info @@ -125,7 +125,8 @@ class FusedAttentionOpKernel : public framework::OpKernel { bool is_upscale_in_train_1 = (dropout_implementation_1 == "upscale_in_train"); - auto *seed_1 = ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; + auto *seed_1 = + ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; bool is_fix_seed_1 = ctx.Attr("attn_dropout_fix_seed"); @@ -468,7 +469,8 @@ class FusedAttentionGradXPUKernel : public framework::OpKernel { ctx.Attr("attn_dropout_implementation"); bool is_upscale_in_train_1 = (dropout_implementation_1 == "upscale_in_train"); - auto *seed_1 = ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; + auto *seed_1 = + ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; bool is_fix_seed_1 = ctx.Attr("attn_dropout_fix_seed"); int seed_val_1 = ctx.Attr("attn_dropout_seed"); @@ -482,79 +484,81 @@ class FusedAttentionGradXPUKernel : public framework::OpKernel { XPUDropoutParam dropout_param(ctx, 0); // get inputs. - auto *d_y = ctx.Input(framework::GradVarName("Y")); + auto *d_y = ctx.Input(framework::GradVarName("Y")); const XPUTypeT *d_y_ptr = reinterpret_cast(d_y->data()); // 前向必要参数 - auto *input_x = ctx.Input("X"); + auto *input_x = ctx.Input("X"); const XPUTypeT *input_x_ptr = reinterpret_cast(input_x->data()); - auto *qkv_transpose_out = ctx.Input("TransposeOut2"); + auto *qkv_transpose_out = ctx.Input("TransposeOut2"); const XPUTypeT *qkv_transpose_out_ptr = reinterpret_cast(qkv_transpose_out->data()); - auto *qkv_weight = ctx.Input("QKVW"); + auto *qkv_weight = ctx.Input("QKVW"); const XPUTypeT *qkv_weight_ptr = reinterpret_cast(qkv_weight->data()); - auto *softmax_out = ctx.Input("SoftmaxOut"); + auto *softmax_out = ctx.Input("SoftmaxOut"); const XPUTypeT *softmax_out_ptr = reinterpret_cast(softmax_out->data()); - auto *attn_dropout_out = ctx.Input("AttnDropoutOut"); + auto *attn_dropout_out = ctx.Input("AttnDropoutOut"); const XPUTypeT *attn_dropout_out_ptr = reinterpret_cast(attn_dropout_out->data()); - auto *attn_dropout_mask = ctx.Input("AttnDropoutMaskOut"); + auto *attn_dropout_mask = ctx.Input("AttnDropoutMaskOut"); const XPUTypeT *attn_dropout_mask_ptr = reinterpret_cast(attn_dropout_mask->data()); - auto *fmha_out = ctx.Input("FMHAOut"); + auto *fmha_out = ctx.Input("FMHAOut"); const XPUTypeT *fmha_out_ptr = reinterpret_cast(fmha_out->data()); - auto *out_linear_weight = ctx.Input("OutLinearW"); + auto *out_linear_weight = ctx.Input("OutLinearW"); const XPUTypeT *out_linear_weight_ptr = reinterpret_cast(out_linear_weight->data()); - auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); + auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); const XPUTypeT *dropout_mask_out_ptr = reinterpret_cast(dropout_mask_out->data()); // 需要计算的梯度 - auto *d_qkv_weight = ctx.Output(framework::GradVarName("QKVW")); + auto *d_qkv_weight = + ctx.Output(framework::GradVarName("QKVW")); XPUTypeT *d_qkv_weight_ptr = reinterpret_cast( d_qkv_weight->mutable_data(ctx.GetPlace())); - auto *d_qkv_bias = ctx.Output(framework::GradVarName("QKVBias")); + auto *d_qkv_bias = + ctx.Output(framework::GradVarName("QKVBias")); XPUTypeT *d_qkv_bias_ptr = reinterpret_cast( d_qkv_bias->mutable_data(ctx.GetPlace())); auto *d_out_linear_weight = - ctx.Output(framework::GradVarName("OutLinearW")); + ctx.Output(framework::GradVarName("OutLinearW")); XPUTypeT *d_out_linear_weight_ptr = reinterpret_cast( d_out_linear_weight->mutable_data(ctx.GetPlace())); auto *d_out_linear_bias = - ctx.Output(framework::GradVarName("OutLinearBias")); + ctx.Output(framework::GradVarName("OutLinearBias")); XPUTypeT *d_out_linear_bias_ptr = reinterpret_cast( d_out_linear_bias->mutable_data(ctx.GetPlace())); // 有可能需要 auto *d_src_mask_out = - ctx.Output(framework::GradVarName("SrcMaskOut")); + ctx.Output(framework::GradVarName("SrcMaskOut")); XPUTypeT *d_src_mask_out_ptr = (d_src_mask_out == nullptr) ? (nullptr) : (reinterpret_cast( d_src_mask_out->mutable_data(ctx.GetPlace()))); // 输出 dx - auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_x = ctx.Output(framework::GradVarName("X")); XPUTypeT *d_x_ptr = reinterpret_cast(d_x->mutable_data(ctx.GetPlace())); - const Tensor *ln_out = nullptr; - const Tensor *bias_dropout_residual_out = nullptr; - const Tensor *ln_scale = nullptr; - const Tensor *ln_mean = nullptr; - const Tensor *ln_var = nullptr; - Tensor *d_ln_scale = nullptr; - Tensor *d_ln_bias = nullptr; + const phi::DenseTensor *ln_out = nullptr; + const phi::DenseTensor *bias_dropout_residual_out = nullptr; + const phi::DenseTensor *ln_scale = nullptr; + const phi::DenseTensor *ln_mean = nullptr; + const phi::DenseTensor *ln_var = nullptr; + phi::DenseTensor *d_ln_scale = nullptr; + phi::DenseTensor *d_ln_bias = nullptr; const XPUTypeT *ln_out_ptr = NULL; const float *ln_scale_ptr = NULL; @@ -567,23 +571,28 @@ class FusedAttentionGradXPUKernel : public framework::OpKernel { float epsilon = 0.0f; if (pre_layer_norm) { - ln_out = ctx.Input("LnOut"); + ln_out = ctx.Input("LnOut"); ln_out_ptr = reinterpret_cast(ln_out->data()); - ln_scale = ctx.Input("LnScale"); - ln_mean = ctx.Input("LnMean"); - ln_var = ctx.Input("LnVariance"); + ln_scale = ctx.Input("LnScale"); + ln_mean = ctx.Input("LnMean"); + ln_var = ctx.Input("LnVariance"); epsilon = ctx.Attr("epsilon"); - d_ln_scale = ctx.Output(framework::GradVarName("LnScale")); - d_ln_bias = ctx.Output(framework::GradVarName("LnBias")); + d_ln_scale = + ctx.Output(framework::GradVarName("LnScale")); + d_ln_bias = + ctx.Output(framework::GradVarName("LnBias")); } else { - ln_scale = ctx.Input("Ln2Scale"); - ln_mean = ctx.Input("Ln2Mean"); - ln_var = ctx.Input("Ln2Variance"); + ln_scale = ctx.Input("Ln2Scale"); + ln_mean = ctx.Input("Ln2Mean"); + ln_var = ctx.Input("Ln2Variance"); epsilon = ctx.Attr("ln_epsilon"); - d_ln_scale = ctx.Output(framework::GradVarName("Ln2Scale")); - d_ln_bias = ctx.Output(framework::GradVarName("Ln2Bias")); - bias_dropout_residual_out = ctx.Input("BiasDropoutResidualOut"); + d_ln_scale = + ctx.Output(framework::GradVarName("Ln2Scale")); + d_ln_bias = + ctx.Output(framework::GradVarName("Ln2Bias")); + bias_dropout_residual_out = + ctx.Input("BiasDropoutResidualOut"); bias_dropout_residual_out_ptr = reinterpret_cast( bias_dropout_residual_out->data()); } diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc index 94131197060b5..02494e33e1241 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu index 664e20b686d7e..2562c2cc22575 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu @@ -25,8 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index faf4a1aae44b6..e68be43eb7ec0 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -304,9 +304,9 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType( PADDLE_THROW(platform::errors::NotFound( "Can not find Y@GRAD in the execution context.")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index c7fbdc88abb33..4023aaa8445f9 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -30,7 +30,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template @@ -143,7 +142,7 @@ class FusedBatchNormActKernel size_t reserve_space_size = 0; void *reserve_space_ptr = nullptr; void *workspace_ptr = nullptr; - Tensor workspace_tensor; + phi::DenseTensor workspace_tensor; // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. @@ -340,7 +339,7 @@ class FusedBatchNormActGradKernel size_t workspace_size = 0; void *workspace_ptr = nullptr; - Tensor workspace_tensor; + phi::DenseTensor workspace_tensor; auto reserve_space_size = reserve_space->memory_size(); cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ACTIVATION; platform::ScopedActivationDescriptor scope_act_desc; diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.h b/paddle/fluid/operators/fused/fused_bn_activation_op.h index f8aab994cb371..b71812db9d3d3 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.h @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class FusedBatchNormActOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index 2d51a3efaf699..08f7087b48d01 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -262,9 +262,9 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType( PADDLE_THROW(platform::errors::NotFound( "Can not find Y@GRAD in the execution context.")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index 5a192b2df5c94..4c4756b8e1979 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -30,7 +30,6 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template @@ -120,7 +119,7 @@ class FusedBatchNormAddActKernel size_t reserve_space_size = 0; void *reserve_space_ptr = nullptr; void *workspace_ptr = nullptr; - Tensor workspace_tensor; + phi::DenseTensor workspace_tensor; // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. @@ -296,7 +295,7 @@ class FusedBatchNormAddActGradKernel size_t workspace_size = 0; void *workspace_ptr = nullptr; - Tensor workspace_tensor; + phi::DenseTensor workspace_tensor; auto reserve_space_size = reserve_space->memory_size(); cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; platform::ScopedActivationDescriptor scope_act_desc; diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h index f4913bca3df98..bdb1f2f35444c 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class FusedBatchNormAddActOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu index 8360f07a5f3e7..b8f2cc5b4b335 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu @@ -33,7 +33,6 @@ template class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = phi::DenseTensor; auto &device_ctx = context.template device_context(); auto ids = context.MultiInput("Ids"); auto embs = context.MultiInput("Embs"); diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index 9c58c6900959e..885f3412a4e06 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -182,16 +182,17 @@ void FusedEmbeddingFCLSTMOpMaker::Make() { "contains the ids to be looked up in W. " "The last dimension size must be 1."); AddInput("Embeddings", - "(Tensor) the learnable weights of X." + "(phi::DenseTensor) the learnable weights of X." " - The shape is (M x 4D), where M is the dim size of x, D is the " "hidden size. " " - Weight = {W_cx, W_ix, W_fx, W_ox}"); - AddInput("WeightH", - "(Tensor) same as LSTMOp, the learnable hidden-hidden weights." - " - The shape is (D x 4D), where D is the hidden size. " - " - Weight = {W_ch, W_ih, W_fh, W_oh}"); + AddInput( + "WeightH", + "(phi::DenseTensor) same as LSTMOp, the learnable hidden-hidden weights." + " - The shape is (D x 4D), where D is the hidden size. " + " - Weight = {W_ch, W_ih, W_fh, W_oh}"); AddInput("Bias", - "(Tensor) the learnable weights. Almost same as LSTMOp" + "(phi::DenseTensor) the learnable weights. Almost same as LSTMOp" "Note: we should add the fc bias into this (1x4D) in bias." "input-hidden bias weight and peephole connections weight if " "setting `use_peepholes` True. " @@ -202,13 +203,15 @@ void FusedEmbeddingFCLSTMOpMaker::Make() { " - The shape is (1 x 7D). " " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."); AddInput("H0", - "(Tensor, optional) (same as LSTMOp) the initial hidden state is an " + "(phi::DenseTensor, optional) (same as LSTMOp) the initial hidden " + "state is an " "optional " "input. This is a tensor with shape (N x D), where N is the " "batch size and D is the hidden size.") .AsDispensable(); AddInput("C0", - "(Tensor, optional) (same as LSTMOp) (the initial cell state is an " + "(phi::DenseTensor, optional) (same as LSTMOp) (the initial cell " + "state is an " "optional " "input. This is a tensor with shape (N x D), where N is the " "batch size. `H0` and `C0` can be NULL but only at the same time.") @@ -318,7 +321,7 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { /* diagonal weight*/ \ const T* wc_data = bias->data() + D4; \ /* for peephole only*/ \ - Tensor checked_cell; \ + phi::DenseTensor checked_cell; \ T* checked_cell_data = nullptr; \ auto place = ctx.GetPlace(); \ if (use_peepholes) { \ diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h index 181fa06b02034..19039ec55946d 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusedEmbeddingFCLSTMOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index 0e4134d428094..9fa62a3704547 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -28,7 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; @@ -175,7 +174,7 @@ class FusedEmbeddingSeqPoolKernel : public framework::OpKernel { auto len = ids_t->numel(); int idx_width = len / offset.back(); - Tensor csr_vals_t, csr_colmuns_t, csr_row_idx_t; + phi::DenseTensor csr_vals_t, csr_colmuns_t, csr_row_idx_t; csr_vals_t.Resize({len}); csr_colmuns_t.Resize({len}); csr_row_idx_t.Resize({(batch_size + 1) * idx_width}); @@ -300,7 +299,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel { auto len = ids->numel(); int idx_width = len / offset.back(); - Tensor csr_vals_t, csr_colmuns_t, csr_row_idx_t; + phi::DenseTensor csr_vals_t, csr_colmuns_t, csr_row_idx_t; csr_vals_t.Resize({len}); csr_colmuns_t.Resize({len}); int64_t batch_size = ids_lod[0].size() - 1; diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index aaf84c7b1eadb..3bf039829ac3d 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class FusedFeedForwardOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index 669672084b52b..28a9cb167e093 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -31,8 +31,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template static void AllReduce(phi::DenseTensor& tensor, // NOLINT const int ring_id, diff --git a/paddle/fluid/operators/fused/fused_feedforward_op_xpu.cc b/paddle/fluid/operators/fused/fused_feedforward_op_xpu.cc index b94d37a921fb6..4b9ba95143345 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op_xpu.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op_xpu.cc @@ -26,30 +26,28 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FusedFeedForwardXPUKernel : public framework::OpKernel { using XPUTypeT = typename XPUTypeTrait::Type; public: void FFN(const phi::XPUContext& dev_ctx, - const Tensor* x, - const Tensor* linear1_weight, - const Tensor* linear1_bias, - const Tensor* linear2_weight, - const Tensor* linear2_bias, - const Tensor* ln_scale, - const Tensor* ln_bias, - Tensor* out, - Tensor* dropout1_mask, - Tensor* dropout2_mask, - Tensor* ln_mean, - Tensor* ln_variance, - Tensor* linear1_out, - Tensor* ln1_out, - Tensor* dropout1_out, - Tensor* dropout2_out, + const phi::DenseTensor* x, + const phi::DenseTensor* linear1_weight, + const phi::DenseTensor* linear1_bias, + const phi::DenseTensor* linear2_weight, + const phi::DenseTensor* linear2_bias, + const phi::DenseTensor* ln_scale, + const phi::DenseTensor* ln_bias, + phi::DenseTensor* out, + phi::DenseTensor* dropout1_mask, + phi::DenseTensor* dropout2_mask, + phi::DenseTensor* ln_mean, + phi::DenseTensor* ln_variance, + phi::DenseTensor* linear1_out, + phi::DenseTensor* ln1_out, + phi::DenseTensor* dropout1_out, + phi::DenseTensor* dropout2_out, const int bsz_seq, const int d_model, const int dim_feedforward, @@ -255,41 +253,41 @@ class FusedFeedForwardXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto place = context.GetPlace(); - auto* x = context.Input("X"); + auto* x = context.Input("X"); - auto* linear1_weight = context.Input("Linear1Weight"); - auto* linear1_bias = context.Input("Linear1Bias"); - auto* linear2_weight = context.Input("Linear2Weight"); - auto* linear2_bias = context.Input("Linear2Bias"); + auto* linear1_weight = context.Input("Linear1Weight"); + auto* linear1_bias = context.Input("Linear1Bias"); + auto* linear2_weight = context.Input("Linear2Weight"); + auto* linear2_bias = context.Input("Linear2Bias"); const bool pre_layer_norm = context.Attr("pre_layer_norm"); - const Tensor* ln_scale = nullptr; - const Tensor* ln_bias = nullptr; - Tensor* ln_mean = nullptr; - Tensor* ln_variance = nullptr; - Tensor* ln1_out = nullptr; + const phi::DenseTensor* ln_scale = nullptr; + const phi::DenseTensor* ln_bias = nullptr; + phi::DenseTensor* ln_mean = nullptr; + phi::DenseTensor* ln_variance = nullptr; + phi::DenseTensor* ln1_out = nullptr; if (pre_layer_norm) { - ln_scale = context.Input("Ln1Scale"); - ln_bias = context.Input("Ln1Bias"); - ln_mean = context.Output("Ln1Mean"); - ln_variance = context.Output("Ln1Variance"); - ln1_out = context.Output("Ln1Out"); + ln_scale = context.Input("Ln1Scale"); + ln_bias = context.Input("Ln1Bias"); + ln_mean = context.Output("Ln1Mean"); + ln_variance = context.Output("Ln1Variance"); + ln1_out = context.Output("Ln1Out"); ln1_out->mutable_data(place); } else { - ln_scale = context.Input("Ln2Scale"); - ln_bias = context.Input("Ln2Bias"); - ln_mean = context.Output("Ln2Mean"); - ln_variance = context.Output("Ln2Variance"); + ln_scale = context.Input("Ln2Scale"); + ln_bias = context.Input("Ln2Bias"); + ln_mean = context.Output("Ln2Mean"); + ln_variance = context.Output("Ln2Variance"); } - auto* out = context.Output("Out"); - auto* dropout1_mask = context.Output("Dropout1Mask"); - auto* dropout2_mask = context.Output("Dropout2Mask"); - auto* linear1_out = context.Output("Linear1Out"); + auto* out = context.Output("Out"); + auto* dropout1_mask = context.Output("Dropout1Mask"); + auto* dropout2_mask = context.Output("Dropout2Mask"); + auto* linear1_out = context.Output("Linear1Out"); - auto* dropout1_out = context.Output("Dropout1Out"); - auto* dropout2_out = context.Output("Dropout2Out"); + auto* dropout1_out = context.Output("Dropout1Out"); + auto* dropout2_out = context.Output("Dropout2Out"); const std::string act_method = context.Attr("act_method"); @@ -356,26 +354,26 @@ class FusedFeedForwardGradXPUKernel : public framework::OpKernel { public: void FFNGrad(const phi::XPUContext& dev_ctx, - const Tensor* d_out, - const Tensor* x, - const Tensor* dropout1_mask, - const Tensor* dropout2_mask, - const Tensor* linear1_out, - const Tensor* ln1_out, - const Tensor* dropout1_out, - const Tensor* dropout2_out, - const Tensor* linear1_weight, - const Tensor* linear2_weight, - const Tensor* ln_scale, - const Tensor* ln_mean, - const Tensor* ln_variance, - Tensor* d_x, - Tensor* d_linear1_weight, - Tensor* d_linear1_bias, - Tensor* d_linear2_weight, - Tensor* d_linear2_bias, - Tensor* d_ln_scale, - Tensor* d_ln_bias, + const phi::DenseTensor* d_out, + const phi::DenseTensor* x, + const phi::DenseTensor* dropout1_mask, + const phi::DenseTensor* dropout2_mask, + const phi::DenseTensor* linear1_out, + const phi::DenseTensor* ln1_out, + const phi::DenseTensor* dropout1_out, + const phi::DenseTensor* dropout2_out, + const phi::DenseTensor* linear1_weight, + const phi::DenseTensor* linear2_weight, + const phi::DenseTensor* ln_scale, + const phi::DenseTensor* ln_mean, + const phi::DenseTensor* ln_variance, + phi::DenseTensor* d_x, + phi::DenseTensor* d_linear1_weight, + phi::DenseTensor* d_linear1_bias, + phi::DenseTensor* d_linear2_weight, + phi::DenseTensor* d_linear2_bias, + phi::DenseTensor* d_ln_scale, + phi::DenseTensor* d_ln_bias, const int bsz_seq, const int d_model, const int dim_feedforward, @@ -696,55 +694,61 @@ class FusedFeedForwardGradXPUKernel : public framework::OpKernel { auto place = context.GetPlace(); const bool pre_layer_norm = context.Attr("pre_layer_norm"); // inputs - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* x = context.Input("X"); + auto* d_out = + context.Input(framework::GradVarName("Out")); + auto* x = context.Input("X"); - auto* dropout1_mask = context.Input("Dropout1Mask"); - auto* dropout2_mask = context.Input("Dropout2Mask"); - auto* linear1_out = context.Input("Linear1Out"); - auto* ln1_out = pre_layer_norm ? context.Input("Ln1Out") : nullptr; + auto* dropout1_mask = context.Input("Dropout1Mask"); + auto* dropout2_mask = context.Input("Dropout2Mask"); + auto* linear1_out = context.Input("Linear1Out"); + auto* ln1_out = + pre_layer_norm ? context.Input("Ln1Out") : nullptr; - auto* dropout1_out = context.Input("Dropout1Out"); - auto* dropout2_out = context.Input("Dropout2Out"); - auto* linear1_weight = context.Input("Linear1Weight"); - auto* linear2_weight = context.Input("Linear2Weight"); + auto* dropout1_out = context.Input("Dropout1Out"); + auto* dropout2_out = context.Input("Dropout2Out"); + auto* linear1_weight = context.Input("Linear1Weight"); + auto* linear2_weight = context.Input("Linear2Weight"); - const Tensor* ln_mean = nullptr; - const Tensor* ln_variance = nullptr; - const Tensor* ln_scale = nullptr; + const phi::DenseTensor* ln_mean = nullptr; + const phi::DenseTensor* ln_variance = nullptr; + const phi::DenseTensor* ln_scale = nullptr; if (pre_layer_norm) { - ln_mean = context.Input("Ln1Mean"); - ln_variance = context.Input("Ln1Variance"); - ln_scale = context.Input("Ln1Scale"); + ln_mean = context.Input("Ln1Mean"); + ln_variance = context.Input("Ln1Variance"); + ln_scale = context.Input("Ln1Scale"); } else { - ln_mean = context.Input("Ln2Mean"); - ln_variance = context.Input("Ln2Variance"); - ln_scale = context.Input("Ln2Scale"); + ln_mean = context.Input("Ln2Mean"); + ln_variance = context.Input("Ln2Variance"); + ln_scale = context.Input("Ln2Scale"); } // output - auto* d_x = context.Output(framework::GradVarName("X")); + auto* d_x = context.Output(framework::GradVarName("X")); - Tensor* d_ln_scale = nullptr; - Tensor* d_ln_bias = nullptr; + phi::DenseTensor* d_ln_scale = nullptr; + phi::DenseTensor* d_ln_bias = nullptr; if (pre_layer_norm) { - d_ln_scale = context.Output(framework::GradVarName("Ln1Scale")); - d_ln_bias = context.Output(framework::GradVarName("Ln1Bias")); + d_ln_scale = + context.Output(framework::GradVarName("Ln1Scale")); + d_ln_bias = + context.Output(framework::GradVarName("Ln1Bias")); } else { - d_ln_scale = context.Output(framework::GradVarName("Ln2Scale")); - d_ln_bias = context.Output(framework::GradVarName("Ln2Bias")); + d_ln_scale = + context.Output(framework::GradVarName("Ln2Scale")); + d_ln_bias = + context.Output(framework::GradVarName("Ln2Bias")); } - auto* d_linear1_weight = - context.Output(framework::GradVarName("Linear1Weight")); + auto* d_linear1_weight = context.Output( + framework::GradVarName("Linear1Weight")); auto* d_linear1_bias = - context.Output(framework::GradVarName("Linear1Bias")); - auto* d_linear2_weight = - context.Output(framework::GradVarName("Linear2Weight")); + context.Output(framework::GradVarName("Linear1Bias")); + auto* d_linear2_weight = context.Output( + framework::GradVarName("Linear2Weight")); auto* d_linear2_bias = - context.Output(framework::GradVarName("Linear2Bias")); + context.Output(framework::GradVarName("Linear2Bias")); float epsilon = 0.0f; if (pre_layer_norm) { diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h index d55d047009255..b7611eff765d2 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention.h +++ b/paddle/fluid/operators/fused/fused_gate_attention.h @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - inline std::string MemoryDebugString(const phi::DenseTensor& t) { int device_id = platform::GetCurrentDeviceId(); int64_t allocated = @@ -233,17 +231,17 @@ struct GateAttentionConfig { } protected: - Tensor qkv_out; - Tensor query_out; - Tensor key_out; - Tensor value_out; + phi::DenseTensor qkv_out; + phi::DenseTensor query_out; + phi::DenseTensor key_out; + phi::DenseTensor value_out; // qk_out = BatchedGEMM(Q, K^T) // qk_out: shape=[batch_size, seq_len_m, num_heads, seq_len_r, m_size] // softmax_out = softmax(qk_out + nonbatched_bias + src_mask) // The shape of qk_out, softmax_out is the same, thus can be called inplace. - Tensor qk_out; + phi::DenseTensor qk_out; // qktv_out may reuse gate_out. - Tensor qktv_out; + phi::DenseTensor qktv_out; }; template @@ -312,11 +310,11 @@ struct GateAttentionGradConfig : public GateAttentionConfig { } protected: - Tensor qkv_out_grad; - Tensor query_out_grad; - Tensor key_out_grad; - Tensor value_out_grad; - Tensor qk_out_grad; + phi::DenseTensor qkv_out_grad; + phi::DenseTensor query_out_grad; + phi::DenseTensor key_out_grad; + phi::DenseTensor value_out_grad; + phi::DenseTensor qk_out_grad; }; template @@ -461,10 +459,10 @@ class FMHAGateRef { T* k_grad_ptr = nullptr; T* v_grad_ptr = nullptr; - Tensor q_transpose_out_grad; - Tensor k_transpose_out_grad; - Tensor v_transpose_out_grad; - Tensor qkv_transpose_out_grad; + phi::DenseTensor q_transpose_out_grad; + phi::DenseTensor k_transpose_out_grad; + phi::DenseTensor v_transpose_out_grad; + phi::DenseTensor qkv_transpose_out_grad; if (merge_qkv_) { PADDLE_ENFORCE_NOT_NULL( qkv_transpose_out, @@ -513,7 +511,7 @@ class FMHAGateRef { v_transpose_out_grad.numel() * sizeof(T)); } - Tensor softmax_out_grad; + phi::DenseTensor softmax_out_grad; softmax_out_grad.Resize(config->softmax_out_dims); AllocWithDebugInfo(dev_ctx_, "softmax_out_grad", &softmax_out_grad); @@ -521,7 +519,7 @@ class FMHAGateRef { config->batch_size * config->seq_len_m * config->num_heads; { // Forward: fmha_out = transpose(qktv_out) - Tensor qktv_out_grad; + phi::DenseTensor qktv_out_grad; qktv_out_grad.Resize(config->qktv_out_dims); AllocWithDebugInfo(dev_ctx_, "qktv_out_grad", &qktv_out_grad); ComputeQKTVTransposeBackward(*fmha_out_grad, &qktv_out_grad); diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cc b/paddle/fluid/operators/fused/fused_gate_attention_op.cc index ce7929c39ffa8..c91bca47cf42f 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; class FusedGateAttentionOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index 9cb3f19ab1740..8ca6cdb46ccd9 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct SigmoidMultiplyFunctor { using MPType = typename phi::dtype::MPTypeTrait::Type; @@ -64,8 +62,8 @@ struct SigmoidMultiplyGradFunctor { template void ComputeMergedQKVMatmulForward(const framework::ExecutionContext &ctx, const GateAttentionConfig &config, - const Tensor *query, - Tensor *qkv_out) { + const phi::DenseTensor *query, + phi::DenseTensor *qkv_out) { // query: shape=[batch_size, seq_len_m, seq_len_r, qkv_dim] // qkv_weight: shape=[3, num_heads, head_dim, qkv_dim] // qkv_out: shape=[batch_size, seq_len_m, seq_len_r, 3, num_heads, head_dim] @@ -83,9 +81,9 @@ void ComputeMergedQKVMatmulForward(const framework::ExecutionContext &ctx, template void ComputeMergedQKVMatmulBackward(const framework::ExecutionContext &ctx, const GateAttentionGradConfig &config, - const Tensor *query, - const Tensor *qkv_out_grad, - Tensor *query_grad, + const phi::DenseTensor *query, + const phi::DenseTensor *qkv_out_grad, + phi::DenseTensor *query_grad, bool use_addto) { auto *qkv_weight = ctx.Input("QKVWeight"); auto *qkv_weight_grad = @@ -111,11 +109,11 @@ void ComputeMergedQKVMatmulBackward(const framework::ExecutionContext &ctx, template void ComputeSeparatedQKVMatmulForward(const framework::ExecutionContext &ctx, const GateAttentionConfig &config, - const Tensor *query, - const Tensor *key, - Tensor *query_out, - Tensor *key_out, - Tensor *value_out) { + const phi::DenseTensor *query, + const phi::DenseTensor *key, + phi::DenseTensor *query_out, + phi::DenseTensor *key_out, + phi::DenseTensor *value_out) { auto *query_weight = ctx.Input("QueryWeight"); auto *key_weight = ctx.Input("KeyWeight"); auto *value_weight = ctx.Input("ValueWeight"); @@ -149,13 +147,13 @@ void ComputeSeparatedQKVMatmulForward(const framework::ExecutionContext &ctx, template void ComputeSeparatedQKVMatmulBackward(const framework::ExecutionContext &ctx, const GateAttentionGradConfig &config, - const Tensor *query, - const Tensor *key, - const Tensor *query_out_grad, - const Tensor *key_out_grad, - const Tensor *value_out_grad, - Tensor *query_grad, - Tensor *key_grad, + const phi::DenseTensor *query, + const phi::DenseTensor *key, + const phi::DenseTensor *query_out_grad, + const phi::DenseTensor *key_out_grad, + const phi::DenseTensor *value_out_grad, + phi::DenseTensor *query_grad, + phi::DenseTensor *key_grad, bool use_addto) { // Gradient of GEMM(key, k_weight) const auto *key_weight = ctx.Input("KeyWeight"); @@ -209,9 +207,9 @@ void ComputeSeparatedQKVMatmulBackward(const framework::ExecutionContext &ctx, template void ComputeGatingLinearForward(const framework::ExecutionContext &ctx, const GateAttentionConfig &config, - const Tensor *query, - const Tensor *fmha_out, - Tensor *gate_out) { + const phi::DenseTensor *query, + const phi::DenseTensor *fmha_out, + phi::DenseTensor *gate_out) { auto *gate_weight = ctx.Input("GateWeight"); auto *gate_bias = ctx.Input("GateBias"); @@ -228,8 +226,8 @@ void ComputeGatingLinearForward(const framework::ExecutionContext &ctx, gate_weight, query, gate_bias, gate_out, gate_out); // gate_out = sigmoid(gate_out) * fmha_out - std::vector ins = {gate_out, fmha_out}; - std::vector outs = {gate_out}; + std::vector ins = {gate_out, fmha_out}; + std::vector outs = {gate_out}; phi::funcs::ElementwiseKernel( ctx.cuda_device_context(), ins, &outs, SigmoidMultiplyFunctor()); } @@ -237,16 +235,16 @@ void ComputeGatingLinearForward(const framework::ExecutionContext &ctx, template void ComputeGatingLinearBackward(const framework::ExecutionContext &ctx, const GateAttentionGradConfig &config, - const Tensor *query, - const Tensor *fmha_out, - const Tensor *gate_out_grad, - Tensor *query_grad, - Tensor *fmha_out_grad) { + const phi::DenseTensor *query, + const phi::DenseTensor *fmha_out, + const phi::DenseTensor *gate_out_grad, + phi::DenseTensor *query_grad, + phi::DenseTensor *fmha_out_grad) { const auto *gate_weight = ctx.Input("GateWeight"); const auto *gate_bias = ctx.Input("GateBias"); auto &dev_ctx = ctx.template device_context(); // Re-compute gate_bias_out - Tensor gate_bias_out; + phi::DenseTensor gate_bias_out; gate_bias_out.Resize(config.gate_out_dims); dev_ctx.Alloc(&gate_bias_out, gate_bias_out.numel() * sizeof(T)); @@ -260,8 +258,9 @@ void ComputeGatingLinearBackward(const framework::ExecutionContext &ctx, // Gradient of sigmoid(gate_bias_out) * fmha_out // Compute inplace and save gate_bias_out_grad to gate_bias_out. - std::vector ins = {gate_out_grad, &gate_bias_out, fmha_out}; - std::vector outs = {&gate_bias_out, fmha_out_grad}; + std::vector ins = { + gate_out_grad, &gate_bias_out, fmha_out}; + std::vector outs = {&gate_bias_out, fmha_out_grad}; phi::funcs::ElementwiseKernel, 2>( ctx.cuda_device_context(), ins, &outs, SigmoidMultiplyGradFunctor()); @@ -284,8 +283,8 @@ void ComputeGatingLinearBackward(const framework::ExecutionContext &ctx, template void ComputeOutputLinearForward(const framework::ExecutionContext &ctx, const GateAttentionConfig &config, - const Tensor *fmha_or_gate_out, - Tensor *out) { + const phi::DenseTensor *fmha_or_gate_out, + phi::DenseTensor *out) { const auto *out_linear_weight = ctx.Input("OutLinearWeight"); const auto *out_linear_bias = ctx.Input("OutLinearBias"); @@ -303,8 +302,8 @@ void ComputeOutputLinearForward(const framework::ExecutionContext &ctx, template void ComputeOutputLinearBackward(const framework::ExecutionContext &ctx, const GateAttentionGradConfig &config, - const Tensor *input, - Tensor *input_grad) { + const phi::DenseTensor *input, + phi::DenseTensor *input_grad) { auto &dev_ctx = ctx.template device_context(); const auto *out_grad = ctx.Input(framework::GradVarName("Out")); @@ -382,15 +381,15 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { query)); // 1. Merged QKV Matmul: einsum(nbhqk,nbkhc -> nbqhc) - Tensor *qkv_out = config.GetQKVOut(); + phi::DenseTensor *qkv_out = config.GetQKVOut(); ComputeMergedQKVMatmulForward(ctx, config, query, qkv_out); AllocWithDebugInfo(dev_ctx, "qkv_transpose_out", qkv_transpose_out); } else { // 1. Separated QKV Matmul - Tensor *query_out = config.GetQueryOut(); - Tensor *key_out = config.GetKeyOut(); - Tensor *value_out = config.GetValueOut(); + phi::DenseTensor *query_out = config.GetQueryOut(); + phi::DenseTensor *key_out = config.GetKeyOut(); + phi::DenseTensor *value_out = config.GetValueOut(); ComputeSeparatedQKVMatmulForward( ctx, config, query, key, query_out, key_out, value_out); @@ -418,7 +417,7 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { } // 4. Output Linear - Tensor *fmha_or_gate_out = has_gating ? gate_out : fmha_out; + phi::DenseTensor *fmha_or_gate_out = has_gating ? gate_out : fmha_out; ComputeOutputLinearForward(ctx, config, fmha_or_gate_out, out); } }; @@ -461,12 +460,12 @@ class FusedGateAttentionGradKernel : public framework::OpKernel { GateAttentionGradConfig config( dev_ctx, query, key, query_weight, qkv_weight, merge_qkv, has_gating); - Tensor fmha_out_grad; + phi::DenseTensor fmha_out_grad; fmha_out_grad.Resize(config.gate_out_dims); AllocWithDebugInfo(dev_ctx, "fmha_out_grad", &fmha_out_grad); if (has_gating) { // 1. Gradient of Output Linear: out = Linear(gate_out) - Tensor gate_out_grad; + phi::DenseTensor gate_out_grad; gate_out_grad.Resize(config.gate_out_dims); AllocWithDebugInfo(dev_ctx, "gate_out_grad", &gate_out_grad); ComputeOutputLinearBackward(ctx, config, gate_out, &gate_out_grad); @@ -505,7 +504,7 @@ class FusedGateAttentionGradKernel : public framework::OpKernel { bool use_addto = has_gating ? true : false; if (merge_qkv) { // 4. Gradient of Merged QKV Matmul - Tensor *qkv_out_grad = config.GetQKVOutGrad(); + phi::DenseTensor *qkv_out_grad = config.GetQKVOutGrad(); ComputeMergedQKVMatmulBackward( ctx, config, query, qkv_out_grad, query_grad, use_addto); } else { @@ -515,9 +514,9 @@ class FusedGateAttentionGradKernel : public framework::OpKernel { if (key_grad) { AllocWithDebugInfo(dev_ctx, "key_grad", key_grad); } - Tensor *query_out_grad = config.GetQueryOutGrad(); - Tensor *key_out_grad = config.GetKeyOutGrad(); - Tensor *value_out_grad = config.GetValueOutGrad(); + phi::DenseTensor *query_out_grad = config.GetQueryOutGrad(); + phi::DenseTensor *key_out_grad = config.GetKeyOutGrad(); + phi::DenseTensor *value_out_grad = config.GetValueOutGrad(); ComputeSeparatedQKVMatmulBackward(ctx, config, query, diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc index 1f9cbf320fb50..013593176aa2d 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class FusedGemmEpilogueOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu index e5bab3cae4fab..05beddc52211b 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FusedGemmEpilogueKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc index b1707ff55950d..687ce97068a35 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class FusedGemmEpilogueXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc index 3a9bd15c101e9..e1be5afa0bd68 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { private: static constexpr const char *OpName = "FusedMultiTransformerINT8Op"; @@ -176,7 +174,7 @@ class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "TimeStep") { VLOG(10) << "var_name:" << var_name << " need not to transform"; diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu index fa22ee8d57e65..a4c11b85b9eeb 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu @@ -62,7 +62,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { auto ln_compute = AttnLayerNorm(dev_ctx, epsilon, bsz_seq, dim_embed); - Tensor ln_mean, ln_var; + phi::DenseTensor ln_mean, ln_var; ln_mean.Resize({{bsz_seq}}); auto *ln_mean_data = dev_ctx.Alloc(&ln_mean, ln_mean.numel() * sizeof(U)); @@ -86,7 +86,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { // (transA, transB, compute_bias) = (false, trans_qkvw, false) AttnMatmulINT8 qkv_compute( dev_ctx, bsz_seq, output_size, input_size, compute_bias); - Tensor qkv_out; + phi::DenseTensor qkv_out; qkv_out.Resize({{bsz, seq_len, 3, num_head, dim_head}}); auto *qkv_out_data = dev_ctx.Alloc(&qkv_out, qkv_out.numel() * sizeof(T)); @@ -123,7 +123,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { out_seq_len += time_step_value; } - Tensor transpose_out_2, qk_out; + phi::DenseTensor transpose_out_2, qk_out; transpose_out_2.Resize({{3, bsz, num_head, seq_len, dim_head}}); auto *transpose_out_2_data = dev_ctx.Alloc(&transpose_out_2, transpose_out_2.numel() * sizeof(T)); @@ -131,9 +131,9 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { qk_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *qk_out_data = dev_ctx.Alloc(&qk_out, qk_out.numel() * sizeof(T)); - Tensor softmax_out; - Tensor attn_dropout_mask_out, attn_dropout_out; - Tensor qktv_out, fmha_out; + phi::DenseTensor softmax_out; + phi::DenseTensor attn_dropout_mask_out, attn_dropout_out; + phi::DenseTensor qktv_out, fmha_out; softmax_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *softmax_out_data = dev_ctx.Alloc(&softmax_out, softmax_out.numel() * sizeof(T)); @@ -170,7 +170,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_embed, dropout_param2, epsilon); auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); - Tensor bias_dropout_residual_out, dropout_mask_out; + phi::DenseTensor bias_dropout_residual_out, dropout_mask_out; T *bias_dropout_residual_out_data = nullptr; if (pre_layer_norm) { bias_dropout_residual_out.Resize({{bsz, seq_len, dim_embed}}); @@ -190,7 +190,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { int dim_ffn = ffn1_weight_dim[0]; AttnMatmulINT8 ffn1_linear_compute( dev_ctx, bsz_seq, dim_ffn, dim_embed, false); - Tensor ffn1_out; + phi::DenseTensor ffn1_out; ffn1_out.Resize({{bsz_seq, dim_ffn}}); auto *ffn1_out_data = dev_ctx.Alloc(&ffn1_out, ffn1_out.numel() * sizeof(T)); @@ -201,7 +201,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_ffn, ffn1_dropout_param); FusedDropoutHelper fused_act_dropout_helper_for_post_layernorm( dev_ctx, bsz_seq, dim_ffn, ffn1_dropout_param); - Tensor ffn1_dropout_out, ffn1_dropout_mask; + phi::DenseTensor ffn1_dropout_out, ffn1_dropout_mask; ffn1_dropout_out.Resize({{bsz_seq, dim_ffn}}); auto *ffn1_dropout_out_data = dev_ctx.Alloc( &ffn1_dropout_out, ffn1_dropout_out.numel() * sizeof(T)); @@ -228,7 +228,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_embed, ffn2_dropout_param, epsilon); // []. init workspace for cublasLt transform - Tensor input_workspace, output_workspace, cublaslt_workspace; + phi::DenseTensor input_workspace, output_workspace, cublaslt_workspace; // for input and output transform data is CUBLASLT_ORDER_COL32 format, int m_max = bsz_seq, k_max = std::max(dim_embed, dim_ffn), n_max = std::max({output_size, dim_embed, dim_ffn}); @@ -248,15 +248,15 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { // calc auto *out = ctx.Output("Out"); auto *from_data = dev_ctx.Alloc(out, out->numel() * sizeof(T)); - Tensor *from_tensor = out; - Tensor tmp_out; + phi::DenseTensor *from_tensor = out; + phi::DenseTensor tmp_out; tmp_out.Resize({{bsz, seq_len, dim_embed}}); auto *tmp_out_data = dev_ctx.Alloc(&tmp_out, tmp_out.numel() * sizeof(T)); auto *x_data = input_x->data(); - Tensor *buf0 = nullptr; - Tensor *buf1 = nullptr; + phi::DenseTensor *buf0 = nullptr; + phi::DenseTensor *buf1 = nullptr; // step0: x --> buf1 // step1: buf1 --> buf0 @@ -293,9 +293,10 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { #endif // step2. qkv - const Tensor *qkv_bias = qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; + const phi::DenseTensor *qkv_bias = + qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; // NOTE: in decoder stage, bias is fused in fmha - const Tensor *bias = time_step ? nullptr : qkv_bias; + const phi::DenseTensor *bias = time_step ? nullptr : qkv_bias; if (!pre_layer_norm && i == 0) { qkv_compute.ComputeForward(qkv_weights[i], input_x, @@ -337,8 +338,9 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { #endif // step3. fmha - const Tensor *cache_kv = cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; - Tensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; + const phi::DenseTensor *cache_kv = + cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; + phi::DenseTensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; if (time_step) { // generation decoder stage // [2, batch_size, num_head, max_seq_len, head_size] diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index 94a89338a6205..92b782c44c77a 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusedMultiTransformerOp : public framework::OperatorWithKernel { private: static constexpr const char *OpName = "FusedMultiTransformerOp"; @@ -143,7 +141,7 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "TimeStep") { VLOG(10) << "var_name:" << var_name << " need not to transform"; diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index aeb00a7947cd6..5ca66cb132b05 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -40,7 +40,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { auto ln_biases = ctx.MultiInput("LnBias"); auto ln_compute = AttnLayerNorm(dev_ctx, epsilon, bsz_seq, dim_embed); - Tensor ln_mean, ln_var; + phi::DenseTensor ln_mean, ln_var; ln_mean.Resize({{bsz_seq}}); auto *ln_mean_data = dev_ctx.Alloc(&ln_mean, ln_mean.numel() * sizeof(U)); @@ -72,7 +72,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { input_size, /*compute_bias=*/false); - Tensor qkv_out; + phi::DenseTensor qkv_out; qkv_out.Resize({{bsz, seq_len, 3, num_head, dim_head}}); auto *qkv_out_data = dev_ctx.Alloc(&qkv_out, qkv_out.numel() * sizeof(T)); @@ -116,7 +116,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { out_seq_len += cache_offset; } - Tensor q_transpose_out, kv_transpose_out, qk_out; + phi::DenseTensor q_transpose_out, kv_transpose_out, qk_out; q_transpose_out.Resize({{bsz, num_head, seq_len, dim_head}}); auto *q_transpose_out_data = dev_ctx.Alloc(&q_transpose_out, q_transpose_out.numel() * sizeof(T)); @@ -128,7 +128,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { qk_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *qk_out_data = dev_ctx.Alloc(&qk_out, qk_out.numel() * sizeof(T)); - Tensor src_mask_out; + phi::DenseTensor src_mask_out; if (cache_offset > 0) { src_mask_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *src_mask_out_data = @@ -136,7 +136,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { } // [2, bs, num_head, cache_seq_len + seq_len, head_dim] - Tensor pre_cache_kv_out; + phi::DenseTensor pre_cache_kv_out; if (cache_offset > 0) { pre_cache_kv_out.Resize( {{2, bsz, num_head, seq_len + cache_offset, dim_head}}); @@ -144,9 +144,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { &pre_cache_kv_out, pre_cache_kv_out.numel() * sizeof(T)); } - Tensor softmax_out; - Tensor attn_dropout_mask_out, attn_dropout_out; - Tensor qktv_out, fmha_out; + phi::DenseTensor softmax_out; + phi::DenseTensor attn_dropout_mask_out, attn_dropout_out; + phi::DenseTensor qktv_out, fmha_out; softmax_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *softmax_out_data = dev_ctx.Alloc(&softmax_out, softmax_out.numel() * sizeof(T)); @@ -179,7 +179,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_embed, dropout_param2, epsilon); auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); - Tensor bias_dropout_residual_out, dropout_mask_out; + phi::DenseTensor bias_dropout_residual_out, dropout_mask_out; T *bias_dropout_residual_out_data = nullptr; if (pre_layer_norm) { bias_dropout_residual_out.Resize({{bsz, seq_len, dim_embed}}); @@ -202,7 +202,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { const phi::DDim ffn1_input_shape({bsz_seq, dim_embed}); ffn1_cublas_linear.Setup(ffn1_input_shape, ffn1_weight_dim, false, false); - Tensor ffn1_out; + phi::DenseTensor ffn1_out; ffn1_out.Resize({{bsz_seq, dim_ffn}}); auto *ffn1_out_data = dev_ctx.Alloc(&ffn1_out, ffn1_out.numel() * sizeof(T)); @@ -223,15 +223,15 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { // calc auto *out = ctx.Output("Out"); auto *from_data = dev_ctx.Alloc(out, out->numel() * sizeof(T)); - Tensor *from_tensor = out; - Tensor tmp_out; + phi::DenseTensor *from_tensor = out; + phi::DenseTensor tmp_out; tmp_out.Resize({{bsz, seq_len, dim_embed}}); auto *tmp_out_data = dev_ctx.Alloc(&tmp_out, tmp_out.numel() * sizeof(T)); auto *x_data = input_x->data(); - Tensor *buf0 = nullptr; - Tensor *buf1 = nullptr; + phi::DenseTensor *buf0 = nullptr; + phi::DenseTensor *buf1 = nullptr; // step0: x --> buf1 // step1: buf1 --> buf0 @@ -270,9 +270,10 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { #endif // step2. qkv - const Tensor *qkv_bias = qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; + const phi::DenseTensor *qkv_bias = + qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; // NOTE: in decoder stage, bias is fused in fmha - const Tensor *bias = time_step ? nullptr : qkv_bias; + const phi::DenseTensor *bias = time_step ? nullptr : qkv_bias; if (!pre_layer_norm && i == 0) { qkv_compute.ComputeForward( qkv_weights[i], input_x, bias, &qkv_out, &qkv_out); @@ -285,8 +286,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { #endif // step3. fmha - const Tensor *cache_kv = cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; - Tensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; + const phi::DenseTensor *cache_kv = + cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; + phi::DenseTensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; if (time_step) { // generation decoder stage // [2, batch_size, num_head, max_seq_len, head_size] @@ -304,11 +306,12 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { time_step->data()[0], 1. / sqrt(dim_head)); } else if (cache_kv_out) { // generation context stage - const Tensor *pre_cache_kv_tensor = + const phi::DenseTensor *pre_cache_kv_tensor = pre_caches.size() > 0 ? pre_caches[i] : nullptr; - Tensor *pre_cache_kv_out_tmp = + phi::DenseTensor *pre_cache_kv_out_tmp = cache_offset > 0 ? &pre_cache_kv_out : nullptr; - Tensor *src_mask_tmp = cache_offset > 0 ? &src_mask_out : nullptr; + phi::DenseTensor *src_mask_tmp = + cache_offset > 0 ? &src_mask_out : nullptr; qkv_bias_add_transpose_split(dev_ctx, q_transpose_out_data, kv_transpose_out_data, @@ -554,7 +557,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { auto ln_biases = ctx.MultiInput("LnBias"); auto ln_compute = AttnLayerNorm(dev_ctx, epsilon, bsz_seq, dim_embed); - Tensor ln_mean, ln_var; + phi::DenseTensor ln_mean, ln_var; ln_mean.Resize({{bsz_seq}}); auto *ln_mean_data = dev_ctx.Alloc(&ln_mean, ln_mean.numel() * sizeof(U)); @@ -586,7 +589,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { input_size, /*compute_bias=*/false); - Tensor qkv_out; + phi::DenseTensor qkv_out; qkv_out.Resize({{bsz, seq_len, 3, num_head, dim_head}}); auto *qkv_out_data = dev_ctx.Alloc(&qkv_out, qkv_out.numel() * sizeof(T)); @@ -630,7 +633,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { out_seq_len += cache_offset; } - Tensor q_transpose_out, kv_transpose_out, qk_out; + phi::DenseTensor q_transpose_out, kv_transpose_out, qk_out; q_transpose_out.Resize({{bsz, num_head, seq_len, dim_head}}); auto *q_transpose_out_data = dev_ctx.Alloc(&q_transpose_out, q_transpose_out.numel() * sizeof(T)); @@ -642,7 +645,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { qk_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *qk_out_data = dev_ctx.Alloc(&qk_out, qk_out.numel() * sizeof(T)); - Tensor src_mask_out; + phi::DenseTensor src_mask_out; if (cache_offset > 0) { src_mask_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *src_mask_out_data = @@ -650,7 +653,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { } // [2, bs, num_head, cache_seq_len + seq_len, head_dim] - Tensor pre_cache_kv_out; + phi::DenseTensor pre_cache_kv_out; if (cache_offset > 0) { pre_cache_kv_out.Resize( {{2, bsz, num_head, seq_len + cache_offset, dim_head}}); @@ -658,9 +661,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { &pre_cache_kv_out, pre_cache_kv_out.numel() * sizeof(T)); } - Tensor softmax_out; - Tensor attn_dropout_mask_out, attn_dropout_out; - Tensor qktv_out, fmha_out; + phi::DenseTensor softmax_out; + phi::DenseTensor attn_dropout_mask_out, attn_dropout_out; + phi::DenseTensor qktv_out, fmha_out; softmax_out.Resize({{bsz, num_head, seq_len, out_seq_len}}); auto *softmax_out_data = dev_ctx.Alloc(&softmax_out, softmax_out.numel() * sizeof(T)); @@ -693,7 +696,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_embed, dropout_param2, epsilon); auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); - Tensor bias_dropout_residual_out, dropout_mask_out; + phi::DenseTensor bias_dropout_residual_out, dropout_mask_out; T *bias_dropout_residual_out_data = nullptr; if (pre_layer_norm) { bias_dropout_residual_out.Resize({{bsz, seq_len, dim_embed}}); @@ -713,7 +716,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { int dim_ffn = ffn1_weight_dim[1]; auto ffn1_linear_compute = AttnMatMul( dev_ctx, false, false, bsz_seq, dim_ffn, dim_embed, false); - Tensor ffn1_out; + phi::DenseTensor ffn1_out; ffn1_out.Resize({{bsz_seq, dim_ffn}}); auto *ffn1_out_data = dev_ctx.Alloc(&ffn1_out, ffn1_out.numel() * sizeof(T)); @@ -722,7 +725,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { DropoutParam ffn1_dropout_param(true, 0, true, true, 0.0, nullptr, 0); FusedDropoutHelper fused_act_dropout_helper( dev_ctx, bsz_seq, dim_ffn, ffn1_dropout_param); - Tensor ffn1_dropout_out, ffn1_dropout_mask; + phi::DenseTensor ffn1_dropout_out, ffn1_dropout_mask; ffn1_dropout_out.Resize({{bsz_seq, dim_ffn}}); auto *ffn1_dropout_out_data = dev_ctx.Alloc( &ffn1_dropout_out, ffn1_dropout_out.numel() * sizeof(T)); @@ -744,15 +747,15 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { // calc auto *out = ctx.Output("Out"); auto *from_data = dev_ctx.Alloc(out, out->numel() * sizeof(T)); - Tensor *from_tensor = out; - Tensor tmp_out; + phi::DenseTensor *from_tensor = out; + phi::DenseTensor tmp_out; tmp_out.Resize({{bsz, seq_len, dim_embed}}); auto *tmp_out_data = dev_ctx.Alloc(&tmp_out, tmp_out.numel() * sizeof(T)); auto *x_data = input_x->data(); - Tensor *buf0 = nullptr; - Tensor *buf1 = nullptr; + phi::DenseTensor *buf0 = nullptr; + phi::DenseTensor *buf1 = nullptr; // step0: x --> buf1 // step1: buf1 --> buf0 @@ -791,9 +794,10 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { #endif // step2. qkv - const Tensor *qkv_bias = qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; + const phi::DenseTensor *qkv_bias = + qkv_biases.size() > 0 ? qkv_biases[i] : nullptr; // NOTE: in decoder stage, bias is fused in fmha - const Tensor *bias = time_step ? nullptr : qkv_bias; + const phi::DenseTensor *bias = time_step ? nullptr : qkv_bias; if (!pre_layer_norm && i == 0) { qkv_compute.ComputeForward( qkv_weights[i], input_x, bias, &qkv_out, &qkv_out); @@ -806,8 +810,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { #endif // step3. fmha - const Tensor *cache_kv = cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; - Tensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; + const phi::DenseTensor *cache_kv = + cache_kvs.size() > 0 ? cache_kvs[i] : nullptr; + phi::DenseTensor *cache_kv_out = cache_kv ? cache_kv_outs[i] : nullptr; if (time_step) { // generation decoder stage // [2, batch_size, num_head, max_seq_len, head_size] @@ -825,11 +830,12 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { time_step->data()[0], 1. / sqrt(dim_head)); } else if (cache_kv_out) { // generation context stage - const Tensor *pre_cache_kv_tensor = + const phi::DenseTensor *pre_cache_kv_tensor = pre_caches.size() > 0 ? pre_caches[i] : nullptr; - Tensor *pre_cache_kv_out_tmp = + phi::DenseTensor *pre_cache_kv_out_tmp = cache_offset > 0 ? &pre_cache_kv_out : nullptr; - Tensor *src_mask_tmp = cache_offset > 0 ? &src_mask_out : nullptr; + phi::DenseTensor *src_mask_tmp = + cache_offset > 0 ? &src_mask_out : nullptr; qkv_bias_add_transpose_split(dev_ctx, q_transpose_out_data, kv_transpose_out_data, diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h index 69ac06206c62b..0500f76110f33 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h @@ -44,8 +44,6 @@ DECLARE_bool(gemm_use_half_precision_compute_type); namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - // for debug // #define _DEBUG_FUSED_MULTI_TRANSFORMER @@ -1119,11 +1117,11 @@ void fmha_launch_kernel(const Masked_multihead_attention_params ¶ms, template void fmha(const phi::GPUContext &dev_ctx, - const Tensor &qkv_tensor, - const Tensor &qkv_bias_tensor, - const Tensor &src_mask_tensor, - Tensor *cache_kv_tensor, - Tensor *out_tensor, + const phi::DenseTensor &qkv_tensor, + const phi::DenseTensor &qkv_bias_tensor, + const phi::DenseTensor &src_mask_tensor, + phi::DenseTensor *cache_kv_tensor, + phi::DenseTensor *out_tensor, int batch_size, int max_seq_length, int num_head, diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 07cfb44a312bc..519ce1c6aca08 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -20,7 +20,6 @@ namespace paddle { namespace operators { #if CUDNN_VERSION >= 7100 -using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc index 814631bd87b47..fc7804f9c4e8c 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.cc +++ b/paddle/fluid/operators/fused/fusion_gru_op.cc @@ -160,26 +160,29 @@ void FusionGRUOpMaker::Make() { "variable-time length input sequence. The underlying tensor in " "this phi::DenseTensor is a matrix with shape (T X M), where T is the " "total time steps in this mini-batch, M is the dim size of x."); - AddInput("H0", - "(Tensor, optional) The initial hidden state is an optional " - "input. This is a tensor with shape (N x D), where N is the " - "batch size, D is the hidden size.") + AddInput( + "H0", + "(phi::DenseTensor, optional) The initial hidden state is an optional " + "input. This is a tensor with shape (N x D), where N is the " + "batch size, D is the hidden size.") .AsDispensable(); AddInput("WeightX", - "(Tensor) The FC weight with shape (M x 3D)," + "(phi::DenseTensor) The FC weight with shape (M x 3D)," "where M is the dim size of x, D is the hidden size. "); - AddInput("WeightH", - "(Tensor) (D x 3D) Same as GRUOp, where D is the hidden size. " - "This weight is not exactly D x 3D as: {W_update, W_reset, W_state}" - "Acutally they are D x 2D and D x D two part weights." - "{W_update, W_reset; W_state}" - "{D x (D + D); D x D}"); + AddInput( + "WeightH", + "(phi::DenseTensor) (D x 3D) Same as GRUOp, where D is the hidden size. " + "This weight is not exactly D x 3D as: {W_update, W_reset, W_state}" + "Acutally they are D x 2D and D x D two part weights." + "{W_update, W_reset; W_state}" + "{D x (D + D); D x D}"); AddInput("Bias", - "(Tensor, optional) (1 x 3D)." + "(phi::DenseTensor, optional) (1 x 3D)." "Almost same as GRUOp." "Note: if have FC bias it should be added on this bias.") .AsDispensable(); - AddOutput("ReorderedH0", "(Tensor) (N x D), which N is the min-batch size.") + AddOutput("ReorderedH0", + "(phi::DenseTensor) (N x D), which N is the min-batch size.") .AsIntermediate(); AddOutput("XX", "(phi::DenseTensor) the result after X * WeightX (size is T x 3D)" diff --git a/paddle/fluid/operators/fused/fusion_gru_op.h b/paddle/fluid/operators/fused/fusion_gru_op.h index 4df5042089053..94bf38068d0dd 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.h +++ b/paddle/fluid/operators/fused/fusion_gru_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionGRUOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index b612d590ea1d9..c526fdc18428c 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -184,16 +184,17 @@ void FusionLSTMOpMaker::Make() { "this phi::DenseTensor is a matrix with shape (T X M), where T is the " "total time steps in this mini-batch, M is the dim size of x."); AddInput("WeightX", - "(Tensor) the learnable weights of X." + "(phi::DenseTensor) the learnable weights of X." " - The shape is (M x 4D), where M is the dim size of x, D is the " "hidden size. " " - Weight = {W_cx, W_ix, W_fx, W_ox}"); - AddInput("WeightH", - "(Tensor) same as LSTMOp, the learnable hidden-hidden weights." - " - The shape is (D x 4D), where D is the hidden size. " - " - Weight = {W_ch, W_ih, W_fh, W_oh}"); + AddInput( + "WeightH", + "(phi::DenseTensor) same as LSTMOp, the learnable hidden-hidden weights." + " - The shape is (D x 4D), where D is the hidden size. " + " - Weight = {W_ch, W_ih, W_fh, W_oh}"); AddInput("Bias", - "(Tensor) the learnable weights. Almost same as LSTMOp" + "(phi::DenseTensor) the learnable weights. Almost same as LSTMOp" "Note: we should add the fc bias into this (1x4D) in bias." "input-hidden bias weight and peephole connections weight if " "setting `use_peepholes` True. " @@ -204,13 +205,15 @@ void FusionLSTMOpMaker::Make() { " - The shape is (1 x 7D). " " - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}."); AddInput("H0", - "(Tensor, optional) (same as LSTMOp) the initial hidden state is an " + "(phi::DenseTensor, optional) (same as LSTMOp) the initial hidden " + "state is an " "optional " "input. This is a tensor with shape (N x D), where N is the " "batch size and D is the hidden size.") .AsDispensable(); AddInput("C0", - "(Tensor, optional) (same as LSTMOp) (the initial cell state is an " + "(phi::DenseTensor, optional) (same as LSTMOp) (the initial cell " + "state is an " "optional " "input. This is a tensor with shape (N x D), where N is the " "batch size. `H0` and `C0` can be NULL but only at the same time.") @@ -234,7 +237,7 @@ void FusionLSTMOpMaker::Make() { AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate(); AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate(); AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate(); - AddOutput("CheckedCell", "(Tensor) (2 x D) only for peephole.") + AddOutput("CheckedCell", "(phi::DenseTensor) (2 x D) only for peephole.") .AsIntermediate(); AddAttr("use_peepholes", "(bool, default: True) " diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.h b/paddle/fluid/operators/fused/fusion_lstm_op.h index 590d4bd7c2914..93f8eb981bbd9 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.h +++ b/paddle/fluid/operators/fused/fusion_lstm_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionLSTMOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc index 7bad7c78edc75..bab06f55be856 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc @@ -107,10 +107,12 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType( void FusionRepeatedFCReluOpMaker::Make() { AddInput("X", "(phi::DenseTensor) Input tensors of this operator."); - AddInput("W", "(Tensor) The weight tensors of this operator.").AsDuplicable(); - AddInput("Bias", "(Tensor) The bias tensors of this operator.") + AddInput("W", "(phi::DenseTensor) The weight tensors of this operator.") .AsDuplicable(); - AddOutput("ReluOut", "(Tensor) The output tensor of each relu operator.") + AddInput("Bias", "(phi::DenseTensor) The bias tensors of this operator.") + .AsDuplicable(); + AddOutput("ReluOut", + "(phi::DenseTensor) The output tensor of each relu operator.") .AsDuplicable() .AsIntermediate(); AddOutput("Out", "(phi::DenseTensor) Output tensor of this operator."); diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h index 2cfb404913c42..16025bf5181b6 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionRepeatedFCReluOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc index cb08e4fbff258..c9166919636bf 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc @@ -102,14 +102,16 @@ void FusionSeqConvEltAddReluOpMaker::Make() { "this phi::DenseTensor is a matrix with shape (T X M), where T is the " "total time steps in this mini-batch, M is the dim size of x."); // PaddingData only support false yet, should be ensured at pass. - AddInput("Filter", - "(Tensor) same as the input(Filter) of sequence conv op is an " - "learnable parameter." - "This is a tensor with shape (K, N), where K is the " - "context_length * dim size of x, N is the output feature size."); - AddInput("Bias", - "(Tensor) the learnable weights. shape (1, N), where N is the " - "output feature size"); + AddInput( + "Filter", + "(phi::DenseTensor) same as the input(Filter) of sequence conv op is an " + "learnable parameter." + "This is a tensor with shape (K, N), where K is the " + "context_length * dim size of x, N is the output feature size."); + AddInput( + "Bias", + "(phi::DenseTensor) the learnable weights. shape (1, N), where N is the " + "output feature size"); AddOutput( "Out", "(phi::DenseTensor) the output(Out) is a LodTensor, which support " @@ -117,7 +119,7 @@ void FusionSeqConvEltAddReluOpMaker::Make() { "this phi::DenseTensor is a matrix with shape (T, N), where, T is the " "total time steps in this mini-batch, N is the output feature size."); AddOutput("ColMat", - "(Tensor) (T, K), where T is where T is the " + "(phi::DenseTensor) (T, K), where T is where T is the " "total time steps in this mini-batch, K is height of Filter") .AsIntermediate(); AddAttr("contextLength", diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h index d1b7ae835821f..96f231f9a3cd5 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionSeqConvEltAddReluOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc index bcc8ee894543f..df4cbba1dec15 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc @@ -114,12 +114,13 @@ void FusionSeqExpandConcatFCOpMaker::Make() { "ref lod " "for sequence expand, and the rest input should have same lod.") .AsDuplicable(); - AddInput("FCWeight", "(Tensor) the weights of fc."); - AddInput("FCBias", "(Tensor, optional) the bias of fc.").AsDispensable(); + AddInput("FCWeight", "(phi::DenseTensor) the weights of fc."); + AddInput("FCBias", "(phi::DenseTensor, optional) the bias of fc.") + .AsDispensable(); AddOutput("Out", "(phi::DenseTensor) Output LodTensor."); AddOutput( "FCOut", - "(Tensor) the intermediate tensor to keep the result of fc." + "(phi::DenseTensor) the intermediate tensor to keep the result of fc." "Shape is (N x D), where N is the batch size, D is the output dim of fc") .AsIntermediate(); AddAttr("fc_activation", diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h index 9c611025351e8..495de5f233445 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionSeqExpandConcatFCOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h index 6dc29b23cbb89..2e2d6e07dc7e5 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionSeqPoolConcatOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index 41944f4bc095f..e3953f9e6abc0 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -77,7 +77,8 @@ void FusionSeqPoolCVMConcatOpMaker::Make() { AddInput("X", "(phi::DenseTensor) Input tensors of this operator.") .AsDuplicable(); AddInput("CVM", - "(Tensor), a 2-D Tensor with shape [N x 2], where N is the batch " + "(phi::DenseTensor), a 2-D phi::DenseTensor with shape [N x 2], " + "where N is the batch " "size, 2 is show and click."); AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator."); AddAttr("pooltype", diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h index 24a02553044b0..b9d7d0dfc340e 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class FusionSeqPoolCVMConcatOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc index b7a01b7955887..8d7f792f3c25b 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc @@ -70,12 +70,12 @@ framework::OpKernelType FusionSquaredMatSubOp::GetExpectedKernelType( } void FusionSquaredMatSubOpMaker::Make() { - AddInput("X", "(Tensor) Input Mat A of this operator."); - AddInput("Y", "(Tensor) Input Mat B of this operator."); - AddOutput("SquaredX", "(Tensor) Squared X.").AsIntermediate(); - AddOutput("SquaredY", "(Tensor) Squared Y.").AsIntermediate(); - AddOutput("SquaredXY", "(Tensor) Squared X*Y.").AsIntermediate(); - AddOutput("Out", "(Tensor) Output tensor of concat operator."); + AddInput("X", "(phi::DenseTensor) Input Mat A of this operator."); + AddInput("Y", "(phi::DenseTensor) Input Mat B of this operator."); + AddOutput("SquaredX", "(phi::DenseTensor) Squared X.").AsIntermediate(); + AddOutput("SquaredY", "(phi::DenseTensor) Squared Y.").AsIntermediate(); + AddOutput("SquaredXY", "(phi::DenseTensor) Squared X*Y.").AsIntermediate(); + AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator."); AddAttr("scalar", "The scalar on output matrix.").SetDefault(1.f); AddComment(R"DOC( Fusion Squared Matrix and substrct operator. diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h index 7707bb14fcefe..fc6a54fd9eb03 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - // ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar class FusionSquaredMatSubOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cu b/paddle/fluid/operators/fused/multihead_matmul_op.cu index 2e8b6f7d0a6b8..ba2b71ff6ffd7 100644 --- a/paddle/fluid/operators/fused/multihead_matmul_op.cu +++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu @@ -273,7 +273,6 @@ template class MultiHeadMatMulV2Kernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = phi::DenseTensor; auto *input = context.Input("Input"); auto *w = context.Input("W"); auto *bias = context.Input("Bias"); @@ -296,7 +295,7 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel { int batch = input_dims[0]; int seq_len = input_dims[1]; int hidden = input_dims[2]; - Tensor temp_bias_tensor; + phi::DenseTensor temp_bias_tensor; // if bias_qk is[batch, 1, 1, seq_len], the bias_qk_d need to be broadcasted if (bias_qk && bias_qk->numel() == (batch * seq_len)) { VLOG(4) << "Do broadcasted bias_qk from [batch, 1, 1, seq_len]"; @@ -343,13 +342,13 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel { device_ctx.template Alloc(out, out->numel() * sizeof(T)); // (B*S, hidden) - const Tensor input_matrix = + const phi::DenseTensor input_matrix = framework::ReshapeToMatrix(*input, 2 /*x_num_col_dims */); // (hidden, 3 * all_head_size) - const Tensor w_matrix = + const phi::DenseTensor w_matrix = framework::ReshapeToMatrix(*w, 1 /*y_num_col_dims*/); - Tensor temp_out_tensor; + phi::DenseTensor temp_out_tensor; auto temp_out_dims = phi::make_ddim({batch, seq_len, 3, head_number, head_size}); temp_out_tensor.Resize( @@ -364,7 +363,7 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel { VLOG(2) << temp_out_tensor; // temp_out_tensor.Resize(temp_out_dims); - Tensor multihead_temp_tensor; + phi::DenseTensor multihead_temp_tensor; // B * head_number * S * S * 1 + B * S * 3 * N * H int scratch_size = batch * head_number * seq_len * seq_len * 1; multihead_temp_tensor.Resize({scratch_size + temp_out_tensor.numel()}); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc index 76f173c2d6d09..b449ca3bbe8da 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class ResNetBasicBlockOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 8310116849611..f6b2d30453f42 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -21,8 +21,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class ResnetBasicBlockAttr { public: explicit ResnetBasicBlockAttr(const framework::ExecutionContext& ctx) { diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index b2d44057365b9..4b46dc76b260e 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - // Shape of bitmask static framework::DDim GetBitmaskDims(std::vector out_shape) { int c = out_shape.back(); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu index 02bde0ef04ff2..446d289a1b959 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ b/paddle/fluid/operators/fused/resnet_unit_op.cu @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ResNetUnitKernel : public framework::OpKernel { public: @@ -39,20 +37,23 @@ class ResNetUnitKernel : public framework::OpKernel { "ResNetUnitOp only supports float16 for now.")); // input x - const Tensor *input_x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); + const phi::DenseTensor *input_x = ctx.Input("X"); + const phi::DenseTensor *filter_x = ctx.Input("FilterX"); + const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); + const phi::DenseTensor *bias_x = ctx.Input("BiasX"); // norm conv - Tensor *conv_out_x = ctx.Output("ConvX"); + phi::DenseTensor *conv_out_x = ctx.Output("ConvX"); // bn finalize - Tensor *saved_mean_x = ctx.Output("SavedMeanX"); - Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); - Tensor *running_mean_x = ctx.Output("RunningMeanX"); - Tensor *running_var_x = ctx.Output("RunningVarX"); + phi::DenseTensor *saved_mean_x = ctx.Output("SavedMeanX"); + phi::DenseTensor *saved_invstd_x = + ctx.Output("SavedInvstdX"); + phi::DenseTensor *running_mean_x = + ctx.Output("RunningMeanX"); + phi::DenseTensor *running_var_x = + ctx.Output("RunningVarX"); // sbar - Tensor *output = ctx.Output("Y"); - Tensor *bitmask = ctx.Output("BitMask"); + phi::DenseTensor *output = ctx.Output("Y"); + phi::DenseTensor *bitmask = ctx.Output("BitMask"); // attrs int padding = ctx.Attr("padding"); int stride = ctx.Attr("stride"); @@ -93,8 +94,8 @@ class ResNetUnitKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); // 1. Conv - Tensor sum_x; - Tensor sum_of_squares_x; + phi::DenseTensor sum_x; + phi::DenseTensor sum_of_squares_x; sum_x.Resize(param_dims); sum_of_squares_x.Resize(param_dims); CudnnNormConvolution conv_x_op(dev_ctx, @@ -109,8 +110,8 @@ class ResNetUnitKernel : public framework::OpKernel { dev_ctx, *input_x, *filter_x, conv_out_x, &sum_x, &sum_of_squares_x); // 2. BN - Tensor equiv_scale_x; - Tensor equiv_bias_x; + phi::DenseTensor equiv_scale_x; + phi::DenseTensor equiv_bias_x; equiv_scale_x.Resize(param_dims); equiv_bias_x.Resize(param_dims); CudnnBNStatsFinalize bn_x_op(dev_ctx, param_shape); @@ -140,24 +141,28 @@ class ResNetUnitKernel : public framework::OpKernel { bitmask_shape); if (has_shortcut) { // input z - const Tensor *input_z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); + const phi::DenseTensor *input_z = ctx.Input("Z"); + const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); + const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); + const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); // norm conv - Tensor *conv_out_z = ctx.Output("ConvZ"); + phi::DenseTensor *conv_out_z = ctx.Output("ConvZ"); // bn finalize - Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); - Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); - Tensor *running_mean_z = ctx.Output("RunningMeanZ"); - Tensor *running_var_z = ctx.Output("RunningVarZ"); + phi::DenseTensor *saved_mean_z = + ctx.Output("SavedMeanZ"); + phi::DenseTensor *saved_invstd_z = + ctx.Output("SavedInvstdZ"); + phi::DenseTensor *running_mean_z = + ctx.Output("RunningMeanZ"); + phi::DenseTensor *running_var_z = + ctx.Output("RunningVarZ"); auto input_z_shape = phi::vectorize(input_z->dims()); auto filter_z_shape = phi::vectorize(filter_z->dims()); // 3.1 Conv for second input - Tensor sum_z; - Tensor sum_of_squares_z; + phi::DenseTensor sum_z; + phi::DenseTensor sum_of_squares_z; sum_z.Resize(param_dims); sum_of_squares_z.Resize(param_dims); CudnnNormConvolution conv_z_op(dev_ctx, @@ -172,8 +177,8 @@ class ResNetUnitKernel : public framework::OpKernel { dev_ctx, *input_z, *filter_z, conv_out_z, &sum_z, &sum_of_squares_z); // 3.2 BN for second input - Tensor equiv_scale_z; - Tensor equiv_bias_z; + phi::DenseTensor equiv_scale_z; + phi::DenseTensor equiv_bias_z; equiv_scale_z.Resize(param_dims); equiv_bias_z.Resize(param_dims); CudnnBNStatsFinalize bn_z_op(dev_ctx, param_shape); @@ -203,7 +208,7 @@ class ResNetUnitKernel : public framework::OpKernel { output, bitmask); } else { - const Tensor *input_z = + const phi::DenseTensor *input_z = fuse_add ? ctx.Input("Z") : nullptr; sbar_op.Forward(dev_ctx, *conv_out_x, @@ -231,26 +236,29 @@ class ResNetUnitGradKernel : public framework::OpKernel { platform::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); - const Tensor *y_grad = + const phi::DenseTensor *y_grad = ctx.Input(framework::GradVarName("Y")); - const Tensor *x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); - const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); - const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); - - const Tensor *conv_out_x = ctx.Input("ConvX"); - const Tensor *output = ctx.Input("Y"); - const Tensor *bitmask = ctx.Input("BitMask"); - - Tensor *x_grad = ctx.Output(framework::GradVarName("X")); - Tensor *filter_x_grad = + const phi::DenseTensor *x = ctx.Input("X"); + const phi::DenseTensor *filter_x = ctx.Input("FilterX"); + const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); + const phi::DenseTensor *bias_x = ctx.Input("BiasX"); + const phi::DenseTensor *saved_mean_x = + ctx.Input("SavedMeanX"); + const phi::DenseTensor *saved_invstd_x = + ctx.Input("SavedInvstdX"); + + const phi::DenseTensor *conv_out_x = ctx.Input("ConvX"); + const phi::DenseTensor *output = ctx.Input("Y"); + const phi::DenseTensor *bitmask = ctx.Input("BitMask"); + + phi::DenseTensor *x_grad = + ctx.Output(framework::GradVarName("X")); + phi::DenseTensor *filter_x_grad = ctx.Output(framework::GradVarName("FilterX")); - Tensor *scale_x_grad = + phi::DenseTensor *scale_x_grad = ctx.Output(framework::GradVarName("ScaleX")); - Tensor *bias_x_grad = + phi::DenseTensor *bias_x_grad = ctx.Output(framework::GradVarName("BiasX")); int padding = ctx.Attr("padding"); @@ -276,7 +284,7 @@ class ResNetUnitGradKernel : public framework::OpKernel { // 1. Backward of BN (+ Add + Relu) for x, get conv_out_x_grad, // scale_x_grad, bias_x_grad - Tensor conv_out_x_grad; + phi::DenseTensor conv_out_x_grad; conv_out_x_grad.Resize(conv_out_x->dims()); CudnnScaleBiasAddRelu sbar_x_op(dev_ctx, act_type, @@ -295,27 +303,28 @@ class ResNetUnitGradKernel : public framework::OpKernel { // ScaleBiasAddRelu // | // Y - const Tensor *z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); - const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); - const Tensor *saved_invstd_z = + const phi::DenseTensor *z = ctx.Input("Z"); + const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); + const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); + const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); + const phi::DenseTensor *saved_mean_z = + ctx.Input("SavedMeanZ"); + const phi::DenseTensor *saved_invstd_z = ctx.Input("SavedInvstdZ"); - const Tensor *conv_out_z = ctx.Input("ConvZ"); + const phi::DenseTensor *conv_out_z = ctx.Input("ConvZ"); - Tensor *z_grad = + phi::DenseTensor *z_grad = ctx.Output(framework::GradVarName("Z")); - Tensor *filter_z_grad = + phi::DenseTensor *filter_z_grad = ctx.Output(framework::GradVarName("FilterZ")); - Tensor *scale_z_grad = + phi::DenseTensor *scale_z_grad = ctx.Output(framework::GradVarName("ScaleZ")); - Tensor *bias_z_grad = + phi::DenseTensor *bias_z_grad = ctx.Output(framework::GradVarName("BiasZ")); // 1.1 Backward of BN + Add (+ Relu) for x, get conv_out_x_grad, // scale_x_grad, bias_x_grad and z_grad_temp - Tensor z_grad_temp; + phi::DenseTensor z_grad_temp; z_grad_temp.Resize(conv_out_z->dims()); sbar_x_op.Backward(dev_ctx, *y_grad, @@ -332,7 +341,7 @@ class ResNetUnitGradKernel : public framework::OpKernel { eps); // 1.2 bn backward for z, get conv_out_z_grad, dscale_z, dbias_z - Tensor conv_out_z_grad; + phi::DenseTensor conv_out_z_grad; conv_out_z_grad.Resize(conv_out_z->dims()); CudnnScaleBiasAddRelu sbar_z_op( dev_ctx, "", false, false, output_shape, param_shape, bitmask_shape); @@ -366,7 +375,7 @@ class ResNetUnitGradKernel : public framework::OpKernel { } else { // 1.1 Backward of BN (+ Add + Relu) for x, get conv_out_x_grad, // scale_x_grad, bias_x_grad (and z_grad) - Tensor *z_grad = + phi::DenseTensor *z_grad = fuse_add ? ctx.Output(framework::GradVarName("Z")) : nullptr; sbar_x_op.Backward(dev_ctx, diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc index 80986761c7cba..1e2741cde5d9e 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ResNetUnitXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; @@ -35,19 +33,22 @@ class ResNetUnitXPUKernel : public framework::OpKernel { bool is_nchw = (ctx.Attr("data_format") == "NCHW"); // input x - const Tensor *input_x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); + const phi::DenseTensor *input_x = ctx.Input("X"); + const phi::DenseTensor *filter_x = ctx.Input("FilterX"); + const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); + const phi::DenseTensor *bias_x = ctx.Input("BiasX"); // output x - Tensor *conv_out_x = ctx.Output("ConvX"); - Tensor *saved_mean_x = ctx.Output("SavedMeanX"); - Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); - Tensor *running_mean_x = ctx.Output("RunningMeanX"); - Tensor *running_var_x = ctx.Output("RunningVarX"); + phi::DenseTensor *conv_out_x = ctx.Output("ConvX"); + phi::DenseTensor *saved_mean_x = ctx.Output("SavedMeanX"); + phi::DenseTensor *saved_invstd_x = + ctx.Output("SavedInvstdX"); + phi::DenseTensor *running_mean_x = + ctx.Output("RunningMeanX"); + phi::DenseTensor *running_var_x = + ctx.Output("RunningVarX"); - Tensor *output = ctx.Output("Y"); + phi::DenseTensor *output = ctx.Output("Y"); // attrs int padding = ctx.Attr("padding"); @@ -101,16 +102,20 @@ class ResNetUnitXPUKernel : public framework::OpKernel { std::vector w_maxlist = {nullptr}; if (has_shortcut) { // input z - const Tensor *input_z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); - - Tensor *conv_out_z = ctx.Output("ConvZ"); - Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); - Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); - Tensor *running_mean_z = ctx.Output("RunningMeanZ"); - Tensor *running_var_z = ctx.Output("RunningVarZ"); + const phi::DenseTensor *input_z = ctx.Input("Z"); + const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); + const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); + const phi::DenseTensor *bias_z = ctx.Input("BiasZ"); + + phi::DenseTensor *conv_out_z = ctx.Output("ConvZ"); + phi::DenseTensor *saved_mean_z = + ctx.Output("SavedMeanZ"); + phi::DenseTensor *saved_invstd_z = + ctx.Output("SavedInvstdZ"); + phi::DenseTensor *running_mean_z = + ctx.Output("RunningMeanZ"); + phi::DenseTensor *running_var_z = + ctx.Output("RunningVarZ"); x_list.push_back(reinterpret_cast(input_z->data())); w_list.push_back(reinterpret_cast(filter_z->data())); @@ -137,7 +142,7 @@ class ResNetUnitXPUKernel : public framework::OpKernel { w_maxlist.push_back(nullptr); } else { if (fuse_add) { - const Tensor *input_z = ctx.Input("Z"); + const phi::DenseTensor *input_z = ctx.Input("Z"); auto input_z_shape = phi::vectorize(input_z->dims()); x_list.push_back(reinterpret_cast(input_z->data())); x_shape_list.push_back(input_z_shape); @@ -189,22 +194,25 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { platform::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); - const Tensor *y_grad = + const phi::DenseTensor *y_grad = ctx.Input(framework::GradVarName("Y")); - const Tensor *x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); - const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); - const Tensor *conv_out_x = ctx.Input("ConvX"); - const Tensor *output = ctx.Input("Y"); - - Tensor *x_grad = ctx.Output(framework::GradVarName("X")); - Tensor *filter_x_grad = + const phi::DenseTensor *x = ctx.Input("X"); + const phi::DenseTensor *filter_x = ctx.Input("FilterX"); + const phi::DenseTensor *scale_x = ctx.Input("ScaleX"); + const phi::DenseTensor *saved_mean_x = + ctx.Input("SavedMeanX"); + const phi::DenseTensor *saved_invstd_x = + ctx.Input("SavedInvstdX"); + const phi::DenseTensor *conv_out_x = ctx.Input("ConvX"); + const phi::DenseTensor *output = ctx.Input("Y"); + + phi::DenseTensor *x_grad = + ctx.Output(framework::GradVarName("X")); + phi::DenseTensor *filter_x_grad = ctx.Output(framework::GradVarName("FilterX")); - Tensor *scale_x_grad = + phi::DenseTensor *scale_x_grad = ctx.Output(framework::GradVarName("ScaleX")); - Tensor *bias_x_grad = + phi::DenseTensor *bias_x_grad = ctx.Output(framework::GradVarName("BiasX")); int padding = ctx.Attr("padding"); @@ -265,21 +273,22 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { // ScaleBiasAddRelu // | // Y - const Tensor *z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); - const Tensor *saved_invstd_z = + const phi::DenseTensor *z = ctx.Input("Z"); + const phi::DenseTensor *filter_z = ctx.Input("FilterZ"); + const phi::DenseTensor *scale_z = ctx.Input("ScaleZ"); + const phi::DenseTensor *saved_mean_z = + ctx.Input("SavedMeanZ"); + const phi::DenseTensor *saved_invstd_z = ctx.Input("SavedInvstdZ"); - const Tensor *conv_out_z = ctx.Input("ConvZ"); + const phi::DenseTensor *conv_out_z = ctx.Input("ConvZ"); - Tensor *z_grad = + phi::DenseTensor *z_grad = ctx.Output(framework::GradVarName("Z")); - Tensor *filter_z_grad = + phi::DenseTensor *filter_z_grad = ctx.Output(framework::GradVarName("FilterZ")); - Tensor *scale_z_grad = + phi::DenseTensor *scale_z_grad = ctx.Output(framework::GradVarName("ScaleZ")); - Tensor *bias_z_grad = + phi::DenseTensor *bias_z_grad = ctx.Output(framework::GradVarName("BiasZ")); x_list.push_back(reinterpret_cast(z->data())); w_list.push_back(reinterpret_cast(filter_z->data())); diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cu b/paddle/fluid/operators/fused/skip_layernorm_op.cu index 96646071567d5..f6fd97f918c07 100644 --- a/paddle/fluid/operators/fused/skip_layernorm_op.cu +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cu @@ -29,7 +29,6 @@ template class SkipLayerNormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = phi::DenseTensor; auto *X = context.Input("X"); auto *Y = context.Input("Y"); auto *scale = context.Input("Scale"); diff --git a/paddle/fluid/operators/fused/xpu_fused_common_function.h b/paddle/fluid/operators/fused/xpu_fused_common_function.h index 1a1ec8c47f9ba..63a22838e8c35 100644 --- a/paddle/fluid/operators/fused/xpu_fused_common_function.h +++ b/paddle/fluid/operators/fused/xpu_fused_common_function.h @@ -19,14 +19,13 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; struct XPUDropoutParam { float dropout_prob; bool is_upscale_in_train; bool is_test; bool fix_seed; - const Tensor *tensor_seed; + const phi::DenseTensor *tensor_seed; int seed_val; XPUDropoutParam() { @@ -61,8 +60,9 @@ struct XPUDropoutParam { str_seed = str_seed + "Seed"; } - tensor_seed = - context.HasInput(str_seed) ? context.Input(str_seed) : nullptr; + tensor_seed = context.HasInput(str_seed) + ? context.Input(str_seed) + : nullptr; if (tensor_seed) { seed_val = *(tensor_seed->data()); } else { @@ -74,7 +74,7 @@ struct XPUDropoutParam { bool is_upscale_in_train_, bool is_test_, bool fix_seed_, - const Tensor *tensor_seed, + const phi::DenseTensor *tensor_seed, int seed_val_) { dropout_prob = dropout_prob_; is_upscale_in_train = is_upscale_in_train_; @@ -108,8 +108,9 @@ struct XPUDropoutParam { } else { str_seed = str_seed + "Seed"; } - tensor_seed = - context.HasInput(str_seed) ? context.Input(str_seed) : nullptr; + tensor_seed = context.HasInput(str_seed) + ? context.Input(str_seed) + : nullptr; if (tensor_seed) { seed_val = *(tensor_seed->data()); diff --git a/paddle/fluid/operators/fused/yolo_box_head_op.cu b/paddle/fluid/operators/fused/yolo_box_head_op.cu index 696cab20db714..88d589f85b0ec 100644 --- a/paddle/fluid/operators/fused/yolo_box_head_op.cu +++ b/paddle/fluid/operators/fused/yolo_box_head_op.cu @@ -67,7 +67,6 @@ template class YoloBoxHeadKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - using Tensor = phi::DenseTensor; auto* x = context.Input("X"); auto* out = context.Output("Out"); auto anchors = context.Attr>("anchors"); diff --git a/paddle/fluid/operators/fused/yolo_box_post_op.cu b/paddle/fluid/operators/fused/yolo_box_post_op.cu index 072f0374c5b82..fc01d7027f31d 100644 --- a/paddle/fluid/operators/fused/yolo_box_post_op.cu +++ b/paddle/fluid/operators/fused/yolo_box_post_op.cu @@ -319,7 +319,6 @@ template class YoloBoxPostKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - using Tensor = phi::DenseTensor; // prepare inputs std::vector boxes_input(3); std::vector> boxes_input_dims(3); diff --git a/paddle/fluid/operators/gather_nd_op_mlu.cc b/paddle/fluid/operators/gather_nd_op_mlu.cc index b6c96e3c2edd5..93b20c86af860 100644 --- a/paddle/fluid/operators/gather_nd_op_mlu.cc +++ b/paddle/fluid/operators/gather_nd_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class GatherNdMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/gather_nd_op_npu.cc b/paddle/fluid/operators/gather_nd_op_npu.cc index 5cea840b4aec5..6629d369db0c6 100644 --- a/paddle/fluid/operators/gather_nd_op_npu.cc +++ b/paddle/fluid/operators/gather_nd_op_npu.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template diff --git a/paddle/fluid/operators/gather_scatter_kernel.cc b/paddle/fluid/operators/gather_scatter_kernel.cc index b579b3175d396..1c6b2e6c1a095 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cc +++ b/paddle/fluid/operators/gather_scatter_kernel.cc @@ -16,8 +16,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class TensorAssign { public: template @@ -50,7 +48,7 @@ template struct cpu_gather_scatter_functor { template - void operator()(Tensor self, + void operator()(phi::DenseTensor self, int dim, const phi::DenseTensor& index, const phi::DenseTensor& src, @@ -130,10 +128,10 @@ struct cpu_gather_scatter_functor { }; template -void cpu_gather_kernel(Tensor self, +void cpu_gather_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor -void cpu_scatter_assign_kernel(Tensor self, +void cpu_scatter_assign_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor -void cpu_scatter_add_kernel(Tensor self, +void cpu_scatter_add_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor -void cpu_scatter_mul_kernel(Tensor self, +void cpu_scatter_mul_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor -void cpu_scatter_input_grad_kernel(Tensor self, +void cpu_scatter_input_grad_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor output, + phi::DenseTensor output, const platform::DeviceContext& ctx) { auto* index_data = index.data(); auto* output_data = output.data(); diff --git a/paddle/fluid/operators/gather_scatter_kernel.cu b/paddle/fluid/operators/gather_scatter_kernel.cu index 2f17b946c6149..1cb4e4a4e9d78 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cu +++ b/paddle/fluid/operators/gather_scatter_kernel.cu @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class TensorAssign { public: template @@ -107,10 +105,10 @@ template struct gpu_gather_scatter_functor { template - void operator()(Tensor self, + void operator()(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const std::string& method_name, const func_t& reduce_op, const platform::DeviceContext& ctx) { @@ -160,10 +158,10 @@ struct gpu_gather_scatter_functor { }; // struct gpu_gather_scatter_functor template -void gpu_gather_kernel(Tensor self, +void gpu_gather_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor -void gpu_scatter_assign_kernel(Tensor self, +void gpu_scatter_assign_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor -void gpu_scatter_add_kernel(Tensor self, +void gpu_scatter_add_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor -void gpu_scatter_mul_kernel(Tensor self, +void gpu_scatter_mul_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor -void gpu_scatter_input_grad_kernel(Tensor self, +void gpu_scatter_input_grad_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor grad, + phi::DenseTensor grad, const platform::DeviceContext& ctx) { auto* index_data = index.data(); auto* grad_data = grad.data(); diff --git a/paddle/fluid/operators/gather_scatter_kernel.h b/paddle/fluid/operators/gather_scatter_kernel.h index b97451b488b92..9cf3c3e33009a 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.h +++ b/paddle/fluid/operators/gather_scatter_kernel.h @@ -30,87 +30,85 @@ namespace operators { Instantiate_Template_Function_index_t(func, unsigned char) #define Instantiate_Template_Function_index_t(func, tensor_t) \ - template void func(Tensor input, \ + template void func(phi::DenseTensor input, \ int dim, \ const phi::DenseTensor& index, \ - Tensor result, \ + phi::DenseTensor result, \ const platform::DeviceContext& ctx); \ - template void func(Tensor input, \ + template void func(phi::DenseTensor input, \ int dim, \ const phi::DenseTensor& index, \ - Tensor result, \ + phi::DenseTensor result, \ const platform::DeviceContext& ctx); -using Tensor = phi::DenseTensor; - template -void cpu_gather_kernel(Tensor self, +void cpu_gather_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx); template -void cpu_scatter_assign_kernel(Tensor self, +void cpu_scatter_assign_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void cpu_scatter_add_kernel(Tensor self, +void cpu_scatter_add_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void cpu_scatter_mul_kernel(Tensor self, +void cpu_scatter_mul_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void cpu_scatter_input_grad_kernel(Tensor self, +void cpu_scatter_input_grad_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx); template -void gpu_gather_kernel(Tensor self, +void gpu_gather_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx); template -void gpu_scatter_assign_kernel(Tensor self, +void gpu_scatter_assign_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void gpu_scatter_add_kernel(Tensor self, +void gpu_scatter_add_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void gpu_scatter_mul_kernel(Tensor self, +void gpu_scatter_mul_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor src, + phi::DenseTensor src, const platform::DeviceContext& ctx); template -void gpu_scatter_input_grad_kernel(Tensor self, +void gpu_scatter_input_grad_kernel(phi::DenseTensor self, int dim, const phi::DenseTensor& index, - Tensor result, + phi::DenseTensor result, const platform::DeviceContext& ctx); } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index ee095c598bc1b..0f81d7fec3184 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class CPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/gaussian_random_op_mlu.cc b/paddle/fluid/operators/gaussian_random_op_mlu.cc index a70ddc428d840..5128cc9502581 100644 --- a/paddle/fluid/operators/gaussian_random_op_mlu.cc +++ b/paddle/fluid/operators/gaussian_random_op_mlu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class MLUGaussianRandomKernel : public framework::OpKernel { public: @@ -30,7 +29,7 @@ class MLUGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - Tensor cpu_tensor(tensor->type()); + phi::DenseTensor cpu_tensor(tensor->type()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); std::normal_distribution dist(mean, std); diff --git a/paddle/fluid/operators/gaussian_random_op_npu.cc b/paddle/fluid/operators/gaussian_random_op_npu.cc index 3523eb7379399..5e3fa3dbef5e6 100644 --- a/paddle/fluid/operators/gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/gaussian_random_op_npu.cc @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class NPUGaussianRandomKernel : public framework::OpKernel { public: @@ -32,7 +31,7 @@ class NPUGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - Tensor cpu_tensor(tensor->dtype()); + phi::DenseTensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); std::normal_distribution dist(mean, std); diff --git a/paddle/fluid/operators/gelu_op_npu.cc b/paddle/fluid/operators/gelu_op_npu.cc index f462336b412a3..7f6d5be9d0c73 100644 --- a/paddle/fluid/operators/gelu_op_npu.cc +++ b/paddle/fluid/operators/gelu_op_npu.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class GeluNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cu b/paddle/fluid/operators/graph_khop_sampler_op.cu index 2e703282bf932..39767b5e20a87 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.cu +++ b/paddle/fluid/operators/graph_khop_sampler_op.cu @@ -49,8 +49,6 @@ constexpr int WARP_SIZE = 32; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct MaxFunctor { T cap; diff --git a/paddle/fluid/operators/graph_khop_sampler_op.h b/paddle/fluid/operators/graph_khop_sampler_op.h index 278bbd5efd723..f5ec87f23c88b 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.h +++ b/paddle/fluid/operators/graph_khop_sampler_op.h @@ -28,8 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void SampleUniqueNeighbors(bidiiter begin, bidiiter end, int num_samples) { int left_num = std::distance(begin, end); diff --git a/paddle/fluid/operators/grid_sampler_op_mlu.cc b/paddle/fluid/operators/grid_sampler_op_mlu.cc index f71969d8b551c..07aa025a9a26c 100644 --- a/paddle/fluid/operators/grid_sampler_op_mlu.cc +++ b/paddle/fluid/operators/grid_sampler_op_mlu.cc @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class GridSamplerMLUKernel : public framework::OpKernel { public: @@ -60,13 +58,13 @@ class GridSamplerMLUKernel : public framework::OpKernel { platform::errors::Unavailable( "Only support zeros padding_mode in mlu grid_sample kernel.")); - Tensor trans_input(input->dtype()); + phi::DenseTensor trans_input(input->dtype()); // transpose input from NCHW to NHWC const std::vector perm_to_nhwc = {0, 2, 3, 1}; TransposeFromMLUTensor( ctx, perm_to_nhwc, input, &trans_input, true /*need_reshape_or_alloc*/); - Tensor tmp_output(output->dtype()); + phi::DenseTensor tmp_output(output->dtype()); tmp_output.mutable_data({n, out_h, out_w, c}, ctx.GetPlace()); MLUCnnlGridSampleDesc grid_sample_desc(mode, padding_mode, align_corners); diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc index 3d6566d62b2a7..7331c792ea568 100644 --- a/paddle/fluid/operators/group_norm_op.cc +++ b/paddle/fluid/operators/group_norm_op.cc @@ -28,7 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; class GroupNormOp : public framework::OperatorWithKernel { @@ -123,16 +122,16 @@ class GroupNormGradOp : public framework::OperatorWithKernel { var, platform::errors::InvalidArgument( "Input(Y@GRAD) of GroupNormGradOp should not be null")); - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } - PADDLE_ENFORCE_NOT_NULL( - t, - platform::errors::InvalidArgument( - "Input(Y@GRAD) Tensor of GroupNormGradOp should not be null")); + PADDLE_ENFORCE_NOT_NULL(t, + platform::errors::InvalidArgument( + "Input(Y@GRAD) phi::DenseTensor of " + "GroupNormGradOp should not be null")); return framework::OpKernelType(framework::TransToProtoVarType(t->dtype()), ctx.GetPlace()); } diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index 6b2ba1670a3b7..9cb4e54ac0054 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -291,7 +291,7 @@ class GroupNormKernel : public framework::OpKernel { var->mutable_data(ctx.GetPlace()); phi::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); - Tensor temp_var; + phi::DenseTensor temp_var; temp_var.mutable_data(var->dims(), ctx.GetPlace()); auto* x_data = x->data(); auto* y_data = y->data(); @@ -642,7 +642,7 @@ class GroupNormGradKernel : public framework::OpKernel { phi::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); - Tensor ds, db; + phi::DenseTensor ds, db; ds.mutable_data({x_dims[0], C}, ctx.GetPlace()); db.mutable_data({x_dims[0], C}, ctx.GetPlace()); T* ds_data = ds.data(); @@ -728,7 +728,7 @@ class GroupNormGradKernel : public framework::OpKernel { // p1 = scale * var_inv // p2 = (db * scale * mean - ds * scale) * pow(var_inv, 3) * (1/n) // p3 = -p2 * mean[ng] - db * scale * var_inv * (1/n); - Tensor p1, p2, p3; + phi::DenseTensor p1, p2, p3; p1.mutable_data({x_dims[0] * C}, ctx.GetPlace()); p2.mutable_data({x_dims[0], groups}, ctx.GetPlace()); p3.mutable_data({x_dims[0], groups}, ctx.GetPlace()); @@ -770,12 +770,12 @@ class GroupNormGradKernel : public framework::OpKernel { set_zero(dev_ctx, d_bias, static_cast(0)); } - Tensor temp_var; + phi::DenseTensor temp_var; temp_var.mutable_data(var->dims(), ctx.GetPlace()); set_zero(dev_ctx, &temp_var, static_cast(0)); T* temp_var_data = temp_var.data(); - Tensor temp_mean; + phi::DenseTensor temp_mean; temp_mean.mutable_data(var->dims(), ctx.GetPlace()); set_zero(dev_ctx, &temp_mean, static_cast(0)); T* temp_mean_data = temp_mean.data(); diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h index 0ce89b4625a13..95cdeefc783f4 100644 --- a/paddle/fluid/operators/group_norm_op.h +++ b/paddle/fluid/operators/group_norm_op.h @@ -28,7 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template diff --git a/paddle/fluid/operators/group_norm_op_npu.cc b/paddle/fluid/operators/group_norm_op_npu.cc index 5fded4cffc713..2c0dec9dd4d0b 100644 --- a/paddle/fluid/operators/group_norm_op_npu.cc +++ b/paddle/fluid/operators/group_norm_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template struct GroupNormFunction { public: @@ -103,14 +101,14 @@ struct GroupNormFunction { const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - Tensor ReduceMeanToNG(const phi::DenseTensor* x, - const DataLayout& data_layout, - const int64_t N, - const int64_t C, - const int64_t H, - const int64_t W, - const int G) { - Tensor y(x->type()); + phi::DenseTensor ReduceMeanToNG(const phi::DenseTensor* x, + const DataLayout& data_layout, + const int64_t N, + const int64_t C, + const int64_t H, + const int64_t W, + const int G) { + phi::DenseTensor y(x->type()); // y.mutable_data( {N,G,1}, place ); if (data_layout == DataLayout::kNCHW) { y.mutable_data({N, G, 1}, place); @@ -119,7 +117,7 @@ struct GroupNormFunction { } else { y.mutable_data({N, 1, G}, place); // shape of x is [N, C*H*W/G, G] - Tensor x_trans(x->type()); + phi::DenseTensor x_trans(x->type()); x_trans.mutable_data({N, G, C * H * W / G}, place); this->Transpose(x, &x_trans, std::vector{0, 2, 1}); this->ReduceMean(&x_trans, &y, std::vector{2}); @@ -150,7 +148,7 @@ class GroupNormNPUKernel : public framework::OpKernel { const auto groups = ctx.Attr("groups"); auto place = ctx.GetPlace(); - Tensor xnorm(x->type()); + phi::DenseTensor xnorm(x->type()); xnorm.mutable_data(x->dims(), place); GroupNormFunction F(ctx); if (data_layout != DataLayout::kNCHW) { @@ -173,12 +171,12 @@ class GroupNormNPUKernel : public framework::OpKernel { F.ReduceMean(&xnorm, mean, axis); F.Sub(&xnorm, mean, &xnorm); - Tensor sqr(x->type()); + phi::DenseTensor sqr(x->type()); sqr.mutable_data(xnorm.dims(), place); F.Mul(&xnorm, &xnorm, &sqr); F.ReduceMean(&sqr, var, axis); - Tensor std(x->type()); + phi::DenseTensor std(x->type()); std.mutable_data(var->dims(), place); F.Adds(var, epsilon, &std); F.Sqrt(&std, &std); @@ -186,13 +184,13 @@ class GroupNormNPUKernel : public framework::OpKernel { F.Div(&xnorm, &std, y); y->Resize({N, C, H, W}); if (scale) { - Tensor scale_t(scale->type()); + phi::DenseTensor scale_t(scale->type()); scale_t.ShareDataWith(*scale); scale_t.Resize({C, 1, 1}); F.Mul(y, &scale_t, y); } if (bias) { - Tensor bias_t(bias->type()); + phi::DenseTensor bias_t(bias->type()); bias_t.ShareDataWith(*bias); bias_t.Resize({C, 1, 1}); F.Add(y, &bias_t, y); @@ -231,11 +229,11 @@ class GroupNormGradNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); auto _type = y->type(); - Tensor xnorm(_type); + phi::DenseTensor xnorm(_type); xnorm.mutable_data(y->dims(), place); - Tensor scale_share(_type); + phi::DenseTensor scale_share(_type); scale_share.ShareDataWith(*scale); - Tensor bias_share(_type); + phi::DenseTensor bias_share(_type); bias_share.ShareDataWith(*bias); int64_t N = y->dims()[0]; @@ -267,7 +265,7 @@ class GroupNormGradNPUKernel : public framework::OpKernel { } if (d_scale) { d_scale->mutable_data(place); - Tensor dy_xnorm(_type); + phi::DenseTensor dy_xnorm(_type); dy_xnorm.mutable_data(d_y->dims(), place); F.Mul(d_y, &xnorm, &dy_xnorm); if (data_layout == DataLayout::kNCHW) { @@ -278,12 +276,12 @@ class GroupNormGradNPUKernel : public framework::OpKernel { } // std = Sqrt(var+epsilon), init shape = [ N, G ] - Tensor std(_type); + phi::DenseTensor std(_type); std.mutable_data(var->dims(), place); F.Adds(var, epsilon, &std); F.Sqrt(&std, &std); // d_xnorm_std = dy_proc * scale / std - Tensor d_xnorm_std(_type); + phi::DenseTensor d_xnorm_std(_type); d_xnorm_std.mutable_data(y->dims(), place); F.Mul(d_y, &scale_share, &d_xnorm_std); if (data_layout == DataLayout::kNCHW) { @@ -303,10 +301,11 @@ class GroupNormGradNPUKernel : public framework::OpKernel { d_x->mutable_data(place); d_x->Resize(xnorm.dims()); F.Mul(&d_xnorm_std, &xnorm, d_x); - Tensor dx1 = F.ReduceMeanToNG(d_x, data_layout, N, C, H, W, G); + phi::DenseTensor dx1 = F.ReduceMeanToNG(d_x, data_layout, N, C, H, W, G); F.Mul(&dx1, &xnorm, d_x); - Tensor dx2 = F.ReduceMeanToNG(&d_xnorm_std, data_layout, N, C, H, W, G); + phi::DenseTensor dx2 = + F.ReduceMeanToNG(&d_xnorm_std, data_layout, N, C, H, W, G); F.Sub(&d_xnorm_std, d_x, d_x); F.Sub(d_x, &dx2, d_x); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index cceecdcad5fd2..1c10692d15fad 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -370,7 +370,7 @@ class GRUCPUKernel : public framework::OpKernel { gru_value.gate_weight = const_cast(weight_data); gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); - Tensor ordered_h0; + phi::DenseTensor ordered_h0; framework::Vector order(batch_gate->lod()[2]); @@ -440,10 +440,10 @@ class GRUCPUKernel : public framework::OpKernel { int bend = static_cast(batch_starts[n + 1]); int cur_batch_size = bend - bstart; - Tensor gate_t = batch_gate->Slice(bstart, bend); - Tensor reset_hidden_prev_t = + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); - Tensor hidden_t = batch_hidden->Slice(bstart, bend); + phi::DenseTensor hidden_t = batch_hidden->Slice(bstart, bend); gru_value.output_value = hidden_t.data(); gru_value.gate_value = gate_t.data(); gru_value.reset_output_value = reset_hidden_prev_t.data(); @@ -505,10 +505,10 @@ class GRUCPUKernel : public framework::OpKernel { int bend = static_cast(batch_starts[n + 1]); int cur_batch_size = bend - bstart; - Tensor gate_t = batch_gate->Slice(bstart, bend); - Tensor reset_hidden_prev_t = + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); - Tensor hidden_t = batch_hidden->Slice(bstart, bend); + phi::DenseTensor hidden_t = batch_hidden->Slice(bstart, bend); gru_value.output_value = hidden_t.data(); gru_value.gate_value = gate_t.data(); gru_value.reset_output_value = reset_hidden_prev_t.data(); diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index a6b57bd88f77d..53006c55f6b98 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -73,7 +73,7 @@ class GRUKernel : public framework::OpKernel { gru_value.gate_weight = const_cast(weight_data); gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); - Tensor ordered_h0; + phi::DenseTensor ordered_h0; framework::Vector order(batch_gate->lod()[2]); @@ -102,9 +102,10 @@ class GRUKernel : public framework::OpKernel { int bend = static_cast(batch_starts[n + 1]); int cur_batch_size = bend - bstart; - Tensor gate_t = batch_gate->Slice(bstart, bend); - Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); - Tensor hidden_t = batch_hidden->Slice(bstart, bend); + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor reset_hidden_prev_t = + batch_reset_hidden_prev->Slice(bstart, bend); + phi::DenseTensor hidden_t = batch_hidden->Slice(bstart, bend); gru_value.output_value = hidden_t.data(); gru_value.gate_value = gate_t.data(); gru_value.reset_output_value = reset_hidden_prev_t.data(); diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 89731e2efa022..286bf9fe2732d 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -25,8 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, @@ -79,7 +77,7 @@ class GRUGradKernel : public framework::OpKernel { zero(dev_ctx, &batch_gate_grad, static_cast(0.0)); zero(dev_ctx, &batch_reset_hidden_prev_grad, static_cast(0.0)); - Tensor ordered_h0, ordered_h0_grad; + phi::DenseTensor ordered_h0, ordered_h0_grad; framework::Vector order(batch_gate->lod()[2]); @@ -126,16 +124,17 @@ class GRUGradKernel : public framework::OpKernel { int bend = static_cast(batch_starts[n + 1]); int cur_batch_size = bend - bstart; - Tensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); gru_value.gate_value = gate_t.data(); - Tensor reset_hidden_prev_t = batch_reset_hidden_prev->Slice(bstart, bend); + phi::DenseTensor reset_hidden_prev_t = + batch_reset_hidden_prev->Slice(bstart, bend); gru_value.reset_output_value = reset_hidden_prev_t.data(); - Tensor hidden_grad_t = batch_hidden_grad.Slice(bstart, bend); + phi::DenseTensor hidden_grad_t = batch_hidden_grad.Slice(bstart, bend); gru_grad.output_grad = hidden_grad_t.data(); - Tensor gate_grad_t = batch_gate_grad.Slice(bstart, bend); + phi::DenseTensor gate_grad_t = batch_gate_grad.Slice(bstart, bend); gru_grad.gate_grad = gate_grad_t.data(); - Tensor reset_hidden_prev_grad_t = + phi::DenseTensor reset_hidden_prev_grad_t = batch_reset_hidden_prev_grad.Slice(bstart, bend); gru_grad.reset_output_grad = reset_hidden_prev_grad_t.data(); if (n == 0) { @@ -144,9 +143,11 @@ class GRUGradKernel : public framework::OpKernel { h0 && h0_grad ? ordered_h0_grad.data() : nullptr; } else { int bstart_pre = static_cast(batch_starts[n - 1]); - Tensor hidden_prev_t = batch_hidden->Slice(bstart_pre, bstart); + phi::DenseTensor hidden_prev_t = + batch_hidden->Slice(bstart_pre, bstart); gru_value.prev_out_value = hidden_prev_t.data(); - Tensor hidden_prev_grad_t = batch_hidden_grad.Slice(bstart_pre, bstart); + phi::DenseTensor hidden_prev_grad_t = + batch_hidden_grad.Slice(bstart_pre, bstart); gru_grad.prev_out_grad = hidden_prev_grad_t.data(); } gru_value.output_value = nullptr; diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 3ed3179a63e63..d46e6cf429f6f 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; template @@ -192,8 +190,8 @@ class GRUUnitGradKernel : public framework::OpKernel { context.Output(framework::GradVarName("Weight")); auto* bias_grad = context.Output(framework::GradVarName("Bias")); - Tensor gate_grad; - Tensor reset_hidden_prev_grad; + phi::DenseTensor gate_grad; + phi::DenseTensor reset_hidden_prev_grad; const T* hidden_prev_data = hidden_prev->data(); const T* weight_data = weight->data(); diff --git a/paddle/fluid/operators/huber_loss_op_mlu.cc b/paddle/fluid/operators/huber_loss_op_mlu.cc index 4387037ad01af..4dc542b675f54 100644 --- a/paddle/fluid/operators/huber_loss_op_mlu.cc +++ b/paddle/fluid/operators/huber_loss_op_mlu.cc @@ -18,17 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class HuberLossMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* residual = ctx.Output("Residual"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* residual = ctx.Output("Residual"); + auto* out = ctx.Output("Out"); auto delta = ctx.Attr("delta"); auto place = ctx.GetPlace(); @@ -65,7 +63,7 @@ class HuberLossMLUKernel : public framework::OpKernel { GetBasePtr(out)); // compute multiply by delta - Tensor scale_tensor, bias_tensor; + phi::DenseTensor scale_tensor, bias_tensor; scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); FillMLUTensorWithHostValue(ctx, static_cast(delta), &scale_tensor); @@ -93,20 +91,20 @@ class HuberLossGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* residual = ctx.Input("Residual"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* residual = ctx.Input("Residual"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); auto delta = ctx.Attr("delta"); auto place = ctx.GetPlace(); - Tensor t_grad_rd; + phi::DenseTensor t_grad_rd; t_grad_rd = ctx.AllocateTmpTensor(residual->dims(), dev_ctx); MLUCnnlTensorDesc t_grad_rd_desc(t_grad_rd); if (dx || dy) { - Tensor t_zero; + phi::DenseTensor t_zero; t_zero = ctx.AllocateTmpTensor(residual->dims(), dev_ctx); FillMLUTensorWithHostValue(ctx, static_cast(0.f), &t_zero); @@ -130,7 +128,7 @@ class HuberLossGradMLUKernel : public framework::OpKernel { GetBasePtr(&t_grad_rd)); } // compute multiply by delta - Tensor scale_tensor, bias_tensor; + phi::DenseTensor scale_tensor, bias_tensor; scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); diff --git a/paddle/fluid/operators/huber_loss_op_npu.cc b/paddle/fluid/operators/huber_loss_op_npu.cc index a7be6feb628bf..78529df55aa94 100644 --- a/paddle/fluid/operators/huber_loss_op_npu.cc +++ b/paddle/fluid/operators/huber_loss_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void HuberLossSub(const platform::Place& place, const aclrtStream& stream, @@ -117,9 +115,9 @@ class HuberLossGradNPUKernel : public framework::OpKernel { .stream(); auto place = ctx.GetPlace(); - Tensor t_grad_rd; + phi::DenseTensor t_grad_rd; if (dx || dy) { - Tensor t_zero; + phi::DenseTensor t_zero; HuberLossZerosLike(place, stream, residual, &t_zero); HuberLossSmoothL1LossGrad( place, stream, residual, &t_zero, dout, delta, &t_grad_rd); diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index a9da8f8f4dbbc..523639faddcbe 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -26,8 +26,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - inline int Im2SeqOutputSize( int input_size, int filter_size, int padding_0, int padding_1, int stride) { const int output_size = @@ -52,7 +50,7 @@ class Im2SequenceKernel : public framework::OpKernel { if (ctx.HasInput("Y") && batch_size > 1) { const phi::DenseTensor* imgrealsize = ctx.Input("Y"); auto out_stride = ctx.Attr>("out_stride"); - Tensor cpu_shape_tensor; + phi::DenseTensor cpu_shape_tensor; paddle::framework::TensorCopySync( *imgrealsize, platform::CPUPlace(), &cpu_shape_tensor); std::vector imgreal_h; @@ -89,15 +87,16 @@ class Im2SequenceKernel : public framework::OpKernel { const std::vector dilations({1, 1}); int offset_out = 0; for (int i = 0; i < batch_size; i++) { - const Tensor src = + const phi::DenseTensor src = in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - Tensor dst = out->Slice(offset_out, - offset_out + output_height[i] * output_width[i]) - .Resize({output_height[i], - output_width[i], - img_channels, - kernels[0], - kernels[1]}); + phi::DenseTensor dst = + out->Slice(offset_out, + offset_out + output_height[i] * output_width[i]) + .Resize({output_height[i], + output_width[i], + img_channels, + kernels[0], + kernels[1]}); offset_out += output_height[i] * output_width[i]; phi::funcs::Im2ColFunctor @@ -127,13 +126,13 @@ class Im2SequenceKernel : public framework::OpKernel { auto out_dims = out->dims(); out->Resize({batch_size, out->numel() / batch_size}); for (int i = 0; i < batch_size; i++) { - const Tensor src = + const phi::DenseTensor src = in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - Tensor dst = out->Slice(i, i + 1).Resize({output_height, - output_width, - img_channels, - kernels[0], - kernels[1]}); + phi::DenseTensor dst = out->Slice(i, i + 1).Resize({output_height, + output_width, + img_channels, + kernels[0], + kernels[1]}); phi::funcs::Im2ColFunctor f; @@ -187,9 +186,9 @@ class Im2SequenceGradKernel : public framework::OpKernel { auto d_out_dims = d_out->dims(); d_out->Resize({batch_size, d_out->numel() / batch_size}); for (int i = 0; i < batch_size; i++) { - Tensor dst = + phi::DenseTensor dst = d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - const Tensor src = d_out->Slice(i, i + 1).Resize( + const phi::DenseTensor src = d_out->Slice(i, i + 1).Resize( {output_height, output_width, img_channels, kernels[0], kernels[1]}); phi::funcs::Col2ImFunctor f; diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc index 425590ebeeb52..0e7f1fea1bd81 100644 --- a/paddle/fluid/operators/index_sample_op_npu.cc +++ b/paddle/fluid/operators/index_sample_op_npu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template void IndexSampleGather(const paddle::platform::NPUDeviceContext& dev_ctx, @@ -38,7 +37,7 @@ void IndexSampleGather(const paddle::platform::NPUDeviceContext& dev_ctx, gather_index_vec.push_back(index_vec[i * index_length + j]); } } - Tensor gather_index; + phi::DenseTensor gather_index; framework::TensorFromVector(gather_index_vec, dev_ctx, &gather_index); gather_index.Resize({batch_size, index_length, 2}); @@ -89,7 +88,7 @@ void IndexSampleGradScatter(const paddle::platform::NPUDeviceContext& dev_ctx, scatter_index_vec.push_back(index_vec[i * index_length + j]); } } - Tensor scatter_index; + phi::DenseTensor scatter_index; framework::TensorFromVector(scatter_index_vec, dev_ctx, &scatter_index); scatter_index.Resize({batch_size, index_length, 2}); diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index a705a95156608..6bb91f325f953 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -22,7 +22,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; template diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc index 0f18f9793d305..327471b216f0b 100644 --- a/paddle/fluid/operators/index_select_op_npu.cc +++ b/paddle/fluid/operators/index_select_op_npu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class IndexSelectNPUKernel : public framework::OpKernel { public: @@ -66,7 +64,7 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { dim += out_dims.size(); } - Tensor casted_index; + phi::DenseTensor casted_index; if (framework::TransToProtoVarType(index->dtype()) != framework::proto::VarType::INT32) { casted_index.mutable_data(index->dims(), ctx.GetPlace()); @@ -90,7 +88,7 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { .AddOutput(*x_grad); runner.Run(stream); } else { - Tensor transed_out_grad; + phi::DenseTensor transed_out_grad; std::vector in_trans_perm; in_trans_perm.push_back(dim); for (int i = 0; i < out_dims.size(); ++i) { @@ -109,7 +107,7 @@ class IndexSelectGradNPUKernel : public framework::OpKernel { .AddOutput(transed_out_grad); in_trans_runner.Run(stream); - Tensor sum_out; + phi::DenseTensor sum_out; framework::DDim sum_dims(x_dims); sum_dims[0] = x_dims[dim]; auto idx = 1; diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index 53453c6cad184..a80324d5d303a 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -145,8 +145,8 @@ class InplaceABNGradOp : public paddle::operators::BatchNormGradOp { "can't find gradient variable of Y")); } const phi::DenseTensor* t = nullptr; - if (var->IsType()) { - t = &var->Get(); + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } @@ -323,9 +323,9 @@ class InplaceABNGradKernel : public framework::OpKernel { auto* mean = ctx.Input("ReserveSpace"); auto* variance = ctx.Input("ReserveSpace"); - paddle::optional space_opt; - paddle::optional mean_opt; - paddle::optional variance_opt; + paddle::optional space_opt; + paddle::optional mean_opt; + paddle::optional variance_opt; if (reserve_space != nullptr) { space_opt = *reserve_space; diff --git a/paddle/fluid/operators/inplace_abn_op.cu b/paddle/fluid/operators/inplace_abn_op.cu index e1131822f289e..bec88e5dfd2a7 100644 --- a/paddle/fluid/operators/inplace_abn_op.cu +++ b/paddle/fluid/operators/inplace_abn_op.cu @@ -171,9 +171,9 @@ class InplaceABNGradKernel : public framework::OpKernel { scale_grad, bias_grad); } else { - paddle::optional space_opt; - paddle::optional mean_opt; - paddle::optional variance_opt; + paddle::optional space_opt; + paddle::optional mean_opt; + paddle::optional variance_opt; if (reserve_space != nullptr) { space_opt = *reserve_space; diff --git a/paddle/fluid/operators/inplace_abn_op.h b/paddle/fluid/operators/inplace_abn_op.h index 2a9568e845492..29253662d4deb 100644 --- a/paddle/fluid/operators/inplace_abn_op.h +++ b/paddle/fluid/operators/inplace_abn_op.h @@ -22,7 +22,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index ed474193461c3..c9f33799c9e10 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -105,9 +105,9 @@ framework::OpKernelType InstanceNormGradOp::GetExpectedKernelType( PADDLE_THROW( platform::errors::NotFound("cannot find gradient variable of Y")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } @@ -126,9 +126,9 @@ framework::OpKernelType InstanceNormDoubleGradOp::GetExpectedKernelType( PADDLE_THROW( platform::errors::NotFound("cannot find gradient variable of Y")); } - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } diff --git a/paddle/fluid/operators/instance_norm_op.h b/paddle/fluid/operators/instance_norm_op.h index 2101f6a12bb53..05e2bde973924 100644 --- a/paddle/fluid/operators/instance_norm_op.h +++ b/paddle/fluid/operators/instance_norm_op.h @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; class InstanceNormOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/instance_norm_op_npu.cc b/paddle/fluid/operators/instance_norm_op_npu.cc index f46c3a806a2c0..f11719bea9c7c 100644 --- a/paddle/fluid/operators/instance_norm_op_npu.cc +++ b/paddle/fluid/operators/instance_norm_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class InstanceNormNPUKernel : public framework::OpKernel { @@ -56,7 +55,7 @@ class InstanceNormNPUKernel : public framework::OpKernel { } } - Tensor tmp_x, tmp_y; + phi::DenseTensor tmp_x, tmp_y; tmp_x.ShareDataWith(*x); tmp_x.Resize(phi::make_ddim(tmp_x_dims)); diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index a589b49500e0a..a0e1410f52d3d 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -945,7 +945,7 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, } auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_w = size_data[0]; @@ -1040,7 +1040,7 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, } auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_h = size_data[0]; @@ -1195,7 +1195,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, } auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_d = size_data[0]; @@ -1288,7 +1288,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, template static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, phi::DenseTensor* input_grad, - const Tensor output_grad) { + const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); @@ -1314,7 +1314,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_w = size_data[0]; @@ -1379,7 +1379,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, template static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, phi::DenseTensor* input_grad, - const Tensor output_grad) { + const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); @@ -1407,7 +1407,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_h = size_data[0]; @@ -1555,7 +1555,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { - Tensor sizes; + phi::DenseTensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_d = size_data[0]; diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 11c1429107654..ad67efc4b78d5 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -26,7 +26,6 @@ template using EigenTensor = phi::EigenTensor; -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; inline std::vector get_new_shape( @@ -1344,7 +1343,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, template static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, phi::DenseTensor* input_grad, - const Tensor output_grad) { + const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); diff --git a/paddle/fluid/operators/interpolate_op_npu.cc b/paddle/fluid/operators/interpolate_op_npu.cc index a059d5b522ee3..36ffd1ae53ed6 100644 --- a/paddle/fluid/operators/interpolate_op_npu.cc +++ b/paddle/fluid/operators/interpolate_op_npu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; inline static void CheckArgument(const framework::ExecutionContext& ctx) { @@ -136,7 +135,7 @@ class InterpolateNPUKernel : public framework::OpKernel { CalcOutSize(ctx, h, w, &out_h, &out_w); // the 'input' tensor may has no set (or wrong set) of the layout - Tensor input_x(input->type()); + phi::DenseTensor input_x(input->type()); input_x.ShareDataWith(*input); input_x.set_layout(data_layout); @@ -188,7 +187,7 @@ class InterpolateGradNPUKernel : public framework::OpKernel { // the 'output_grad' tensor may has no set (or wrong set) of the layout auto* output_grad = ctx.Input(framework::GradVarName("Out")); - Tensor output_grad_tmp(output_grad->type()); + phi::DenseTensor output_grad_tmp(output_grad->type()); output_grad_tmp.ShareDataWith(*output_grad); output_grad_tmp.set_layout(data_layout); diff --git a/paddle/fluid/operators/interpolate_v2_op_mlu.cc b/paddle/fluid/operators/interpolate_v2_op_mlu.cc index 833d650d6a131..e6f34539b1c01 100644 --- a/paddle/fluid/operators/interpolate_v2_op_mlu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_mlu.cc @@ -175,7 +175,7 @@ class InterpolateV2MLUKernel : public framework::OpKernel { // cnnlInterp_v2 only accepts NHWC when mode is CNNL_INTERP_BILINEAR and // CNNL_INTERP_NEAREST, framework::DDim dim_in, dim_in_trans, dim_out, dim_out_trans; - Tensor transformed_input, transformed_output; + phi::DenseTensor transformed_input, transformed_output; bool need_transpose = input_dims.size() != 2; if (input_dims.size() == 4) { // need to do transpose if layout is kNCHW @@ -439,7 +439,7 @@ class InterpolateV2GradMLUKernel : public framework::OpKernel { framework::DDim dim_grad; framework::DDim dim_out_grad, dim_out_trans_grad, dim_in_grad, dim_in_trans_grad; - Tensor transformed_output_grad, transformed_input_grad; + phi::DenseTensor transformed_output_grad, transformed_input_grad; bool need_transpose = input_dims.size() != 2 && data_layout == DataLayout::kNCHW; diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc index 69d2f563e37bc..31f08badd128c 100644 --- a/paddle/fluid/operators/interpolate_v2_op_npu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; using DDim = framework::DDim; using fp16 = paddle::platform::float16; @@ -104,7 +103,7 @@ struct InterpolateFunction { auto yt = y_new_shape[axis]; y_new_shape[axis] = y_new_shape[0]; y_new_shape[0] = yt; - Tensor gy_t; + phi::DenseTensor gy_t; gy_t.mutable_data(y_new_shape, place); Transpose(gy, &gy_t, axis_swap); // 2 scatter @@ -112,7 +111,7 @@ struct InterpolateFunction { auto xt = x_new_shape[axis]; x_new_shape[axis] = x_new_shape[0]; x_new_shape[0] = xt; - Tensor gx_zero, gx_t; + phi::DenseTensor gx_zero, gx_t; gx_zero.mutable_data(x_new_shape, place); gx_t.mutable_data(x_new_shape, place); FillNpuTensorWithConstant(&gx_zero, static_cast(0)); @@ -161,14 +160,14 @@ struct InterpolateFunction { platform::Place place; aclrtStream stream; const framework::ExecutionContext& ctx; - Tensor t0; - Tensor t1; - Tensor tn; + phi::DenseTensor t0; + phi::DenseTensor t1; + phi::DenseTensor tn; }; template <> void InterpolateFunction::Arange(int n, phi::DenseTensor* x) { - Tensor x_fp32(experimental::DataType::FLOAT32); + phi::DenseTensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {}); @@ -238,7 +237,7 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, phi::DenseTensor* coef_w1) { InterpolateFunction F(ctx); auto place = ctx.GetPlace(); - Tensor _h0, _w0; + phi::DenseTensor _h0, _w0; _h0.mutable_data({out_h}, place); _w0.mutable_data({out_w}, place); F.Arange(out_h, &_h0); @@ -255,8 +254,8 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, F.Muls(&_w0, ratio_w, &_w0); } - Tensor zero_t; - Tensor one_t; + phi::DenseTensor zero_t; + phi::DenseTensor one_t; zero_t.mutable_data({1}, place); one_t.mutable_data({1}, place); FillNpuTensorWithConstant(&zero_t, static_cast(0)); @@ -264,7 +263,7 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, F.Maximum(&_h0, &zero_t, &_h0); F.Maximum(&_w0, &zero_t, &_w0); - Tensor _h0_floor, _w0_floor; + phi::DenseTensor _h0_floor, _w0_floor; _h0_floor.mutable_data({out_h}, place); _w0_floor.mutable_data({out_w}, place); F.Floor(&_h0, &_h0_floor); @@ -272,12 +271,12 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, F.Cast(&_h0_floor, h0); F.Cast(&_w0_floor, w0); - Tensor one_int; + phi::DenseTensor one_int; one_int.mutable_data({1}, place); FillNpuTensorWithConstant(&one_int, static_cast(1)); F.Add(h0, &one_int, h1); F.Add(w0, &one_int, w1); - Tensor t_max_h, t_max_w; + phi::DenseTensor t_max_h, t_max_w; t_max_h.mutable_data({1}, place); t_max_w.mutable_data({1}, place); FillNpuTensorWithConstant(&t_max_h, static_cast(in_h - 1)); @@ -334,12 +333,12 @@ void BilinearFwdNpu(const framework::ExecutionContext& ctx, &ratio_h, &ratio_w); - Tensor h0, h1, w0, w1; + phi::DenseTensor h0, h1, w0, w1; h0.mutable_data({out_h}, place); h1.mutable_data({out_h}, place); w0.mutable_data({out_w}, place); w1.mutable_data({out_w}, place); - Tensor coef_h0, coef_h1, coef_w0, coef_w1; + phi::DenseTensor coef_h0, coef_h1, coef_w0, coef_w1; coef_h0.mutable_data({out_h}, place); coef_h1.mutable_data({out_h}, place); coef_w0.mutable_data({out_w}, place); @@ -363,7 +362,7 @@ void BilinearFwdNpu(const framework::ExecutionContext& ctx, &coef_w0, &coef_w1); - Tensor input_gather_h0, input_gather_h1; + phi::DenseTensor input_gather_h0, input_gather_h1; auto dim_gather_h = indim; dim_gather_h[axis_h] = out_h; input_gather_h0.mutable_data(dim_gather_h, place); @@ -374,13 +373,13 @@ void BilinearFwdNpu(const framework::ExecutionContext& ctx, F.Mul(&input_gather_h0, &coef_h0, &input_gather_h0); F.Mul(&input_gather_h1, &coef_h1, &input_gather_h1); - Tensor out_x4; + phi::DenseTensor out_x4; out_x4.mutable_data({4, outdim[0], outdim[1], outdim[2], outdim[3]}, place); - Tensor input_gather_h0_w0 = out_x4.Slice(0, 1); - Tensor input_gather_h0_w1 = out_x4.Slice(1, 2); - Tensor input_gather_h1_w0 = out_x4.Slice(2, 3); - Tensor input_gather_h1_w1 = out_x4.Slice(3, 4); + phi::DenseTensor input_gather_h0_w0 = out_x4.Slice(0, 1); + phi::DenseTensor input_gather_h0_w1 = out_x4.Slice(1, 2); + phi::DenseTensor input_gather_h1_w0 = out_x4.Slice(2, 3); + phi::DenseTensor input_gather_h1_w1 = out_x4.Slice(3, 4); F.Gather(&input_gather_h0, &w0, axis_w, &input_gather_h0_w0); F.Gather(&input_gather_h0, &w1, axis_w, &input_gather_h0_w1); F.Gather(&input_gather_h1, &w0, axis_w, &input_gather_h1_w0); @@ -425,12 +424,12 @@ void BilinearBwdNpu(const framework::ExecutionContext& ctx, &ratio_h, &ratio_w); - Tensor h0, h1, w0, w1; + phi::DenseTensor h0, h1, w0, w1; h0.mutable_data({out_h}, place); h1.mutable_data({out_h}, place); w0.mutable_data({out_w}, place); w1.mutable_data({out_w}, place); - Tensor coef_h0, coef_h1, coef_w0, coef_w1; + phi::DenseTensor coef_h0, coef_h1, coef_w0, coef_w1; coef_h0.mutable_data({out_h}, place); coef_h1.mutable_data({out_h}, place); coef_w0.mutable_data({out_w}, place); @@ -454,7 +453,7 @@ void BilinearBwdNpu(const framework::ExecutionContext& ctx, &coef_w0, &coef_w1); - Tensor gy_w0, gy_w1; + phi::DenseTensor gy_w0, gy_w1; gy_w0.mutable_data(outdim, place); gy_w1.mutable_data(outdim, place); F.Mul(gout, &coef_w0, &gy_w0); @@ -462,7 +461,7 @@ void BilinearBwdNpu(const framework::ExecutionContext& ctx, auto dim_gather_h = indim; dim_gather_h[axis_h] = out_h; - Tensor g_gather_w0, g_gather_w1; + phi::DenseTensor g_gather_w0, g_gather_w1; g_gather_w0.mutable_data(dim_gather_h, place); g_gather_w1.mutable_data(dim_gather_h, place); w0.Resize({out_w, 1}); @@ -474,7 +473,7 @@ void BilinearBwdNpu(const framework::ExecutionContext& ctx, F.Mul(&g_gather_w0, &coef_h1, &g_gather_w1); F.Mul(&g_gather_w0, &coef_h0, &g_gather_w0); - Tensor gx_0, gx_1; + phi::DenseTensor gx_0, gx_1; gx_0.mutable_data(indim, place); gx_1.mutable_data(indim, place); h0.Resize({out_h, 1}); @@ -493,10 +492,11 @@ class InterpolateV2NPUKernel : public framework::OpKernel { auto* output = ctx.Output("Out"); auto input_dims = input->dims(); - PADDLE_ENFORCE_EQ(input_dims.size(), - 4UL, - platform::errors::External( - "NPU Interpolate Kernel only support 4-D Tensor.")); + PADDLE_ENFORCE_EQ( + input_dims.size(), + 4UL, + platform::errors::External( + "NPU Interpolate Kernel only support 4-D phi::DenseTensor.")); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); @@ -552,14 +552,16 @@ class InterpolateV2NPUKernel : public framework::OpKernel { scale_w > 0, true, platform::errors::InvalidArgument( - "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "The scale_w in input 'Scale' phi::DenseTensor of " + "Operator(interpolate) " "should be greater than 0, but received value is %d.", scale_w)); PADDLE_ENFORCE_EQ( scale_h > 0, true, platform::errors::InvalidArgument( - "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "The scale_h in input 'Scale' phi::DenseTensor of " + "Operator(interpolate) " "should be greater than 0, but received value is %d.", scale_h)); } else { @@ -704,14 +706,16 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel { scale_w > 0, true, platform::errors::InvalidArgument( - "The scale_w in input 'Scale' Tensor of Operator(interpolate) " + "The scale_w in input 'Scale' phi::DenseTensor of " + "Operator(interpolate) " "should be greater than 0, but received value is %d.", scale_w)); PADDLE_ENFORCE_EQ( scale_h > 0, true, platform::errors::InvalidArgument( - "The scale_h in input 'Scale' Tensor of Operator(interpolate) " + "The scale_h in input 'Scale' phi::DenseTensor of " + "Operator(interpolate) " "should be greater than 0, but received value is %d.", scale_h)); } else { diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 8070527a56a8c..2c2cab61521ef 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -135,12 +135,11 @@ void BenchAllImpls(const typename KernelTuple::attr_type& attr, Args... args) { LOG(INFO) << loginfos.str(); } -using Tensor = phi::DenseTensor; template void BenchKernelXYZN() { using T = typename KernelTuple::data_type; for (int d : TestSizes()) { - Tensor x, y, z; + phi::DenseTensor x, y, z; x.Resize({d}); y.Resize({d}); z.Resize({d}); @@ -161,7 +160,7 @@ void BenchKernelAXYN() { using T = typename KernelTuple::data_type; for (int d : TestSizes()) { const T a = static_cast(3); - Tensor x, y; + phi::DenseTensor x, y; x.Resize({d}); y.Resize({d}); T* x_data = x.mutable_data(PlaceType()); @@ -177,7 +176,7 @@ template void BenchKernelXRN() { using T = typename KernelTuple::data_type; for (int d : TestSizes()) { - Tensor x; + phi::DenseTensor x; RandomVec(d, x.mutable_data({d}, PlaceType())); T res; BenchAllImpls(d, x.data(), &res, d); @@ -188,7 +187,7 @@ template void BenchKernelXYN() { using T = typename KernelTuple::data_type; for (int d : TestSizes()) { - Tensor x, y; + phi::DenseTensor x, y; x.Resize({d}); y.Resize({d}); T* x_data = x.mutable_data(PlaceType()); @@ -205,7 +204,7 @@ void BenchKernelLSTM() { for (int d : TestSizes()) { const jit::lstm_attr_t attr( d, jit::kVSigmoid, jit::kVTanh, jit::kVTanh, use_peephole); - Tensor x, ct_1, ct, ht, wp, checked; + phi::DenseTensor x, ct_1, ct, ht, wp, checked; x.Resize({4 * d}); ct_1.Resize({d}); ct.Resize({d}); @@ -242,7 +241,7 @@ void BenchKernelGRU() { for (int d : TestSizes()) { const jit::gru_attr_t attr(d, jit::kVSigmoid, jit::kVTanh); auto place = PlaceType(); - Tensor x, ht_1, ht; + phi::DenseTensor x, ht_1, ht; x.Resize({3 * d}); ht_1.Resize({d}); ht.Resize({d}); @@ -269,7 +268,7 @@ void BenchKernelSeqPool() { jit::seq_pool_attr_t attr(w, type); for (int h : TestSizes()) { attr.h = h; - Tensor x, y; + phi::DenseTensor x, y; x.Resize({h * w}); y.Resize({w}); RandomVec(h * w, x.mutable_data(PlaceType()), -2.f, 2.f); @@ -287,7 +286,7 @@ void BenchKernelEmbSeqPool() { std::vector pool_types = {jit::SeqPoolType::kSum}; int64_t tbl_h = 1e4; for (int tbl_w : {10, 16, 256}) { - Tensor table; + phi::DenseTensor table; table.Resize({tbl_h, tbl_w}); RandomVec(tbl_h * tbl_w, table.mutable_data(PlaceType()), -2.f, 2.f); const T* table_data = table.data(); @@ -297,7 +296,7 @@ void BenchKernelEmbSeqPool() { int64_t out_w = tbl_w * idx_w; jit::emb_seq_pool_attr_t attr( tbl_h, tbl_w, idx_h, idx_w, out_w, type); - Tensor idx, out; + phi::DenseTensor idx, out; idx.Resize({idx_h, idx_w}); out.Resize({out_w}); RandomVec(idx_h * idx_w, @@ -348,12 +347,12 @@ void BenchKernelSgd() { for (int param_h : {1, 1000}) { for (int grad_w : {1, 2, 8, 16, 30, 256}) { // only benchmark inplace - Tensor param; + phi::DenseTensor param; param.Resize({param_h, grad_w}); T* param_data = param.mutable_data(PlaceType()); RandomVec(param_h * grad_w, param_data, -2.f, 2.f); for (int rows_size = 1; rows_size <= std::min(param_h, 10); ++rows_size) { - Tensor grad; + phi::DenseTensor grad; grad.Resize({rows_size, grad_w}); std::vector rows = UnDuplicatedRandomVec(rows_size, 0, rows_size - 1); @@ -375,7 +374,7 @@ void BenchKernelMatMul() { for (int m : {1, 2, 3, 4}) { for (int n : TestSizes()) { for (int k : TestSizes()) { - Tensor a, b, c; + phi::DenseTensor a, b, c; a.Resize({m * k}); b.Resize({k * n}); c.Resize({m * n}); @@ -397,7 +396,7 @@ void BenchKernelSoftmax() { using T = typename KernelTuple::data_type; for (int bs : {1, 2, 10}) { for (int n : TestSizes()) { - Tensor x, y; + phi::DenseTensor x, y; x.Resize({bs, n}); y.Resize({bs, n}); RandomVec(bs * n, x.mutable_data(PlaceType()), -2.f, 2.f); @@ -418,7 +417,7 @@ void BenchKernelLayerNorm() { for (int x_dim_1 : TestSizes()) { int right = x_dim_1; int sz = left * right; - Tensor x, mean, var, scale, bias, out; + phi::DenseTensor x, mean, var, scale, bias, out; x.Resize({n, x_dim_0, x_dim_1}); out.Resize({n, x_dim_0, x_dim_1}); mean.Resize({n, x_dim_0}); @@ -462,7 +461,7 @@ void BenchKernelCRFDecoding() { for (int tag_num : TestSizes()) { int x_sz = seq_len * tag_num; int w_sz = (tag_num + state_trans_base_idx) * tag_num; - Tensor x, w, alpha, track; + phi::DenseTensor x, w, alpha, track; x.Resize({seq_len, tag_num}); w.Resize({tag_num + state_trans_base_idx, tag_num}); alpha.Resize({seq_len, tag_num}); @@ -486,12 +485,12 @@ template void BenchKernelVBroadcast() { using T = typename KernelTuple::data_type; for (int64_t w : {1, 16, 64, 100, 256}) { - Tensor x; + phi::DenseTensor x; x.Resize({w}); RandomVec(w, x.mutable_data(PlaceType())); const T* x_data = x.data(); for (int h : TestSizes()) { - Tensor y; + phi::DenseTensor y; y.Resize({h * w}); T* y_data = y.mutable_data(PlaceType()); BenchAllImpls( diff --git a/paddle/fluid/operators/kldiv_loss_op_npu.cc b/paddle/fluid/operators/kldiv_loss_op_npu.cc index f21e939a7b118..760675ea74663 100644 --- a/paddle/fluid/operators/kldiv_loss_op_npu.cc +++ b/paddle/fluid/operators/kldiv_loss_op_npu.cc @@ -20,8 +20,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class KLDivLossNPUKernel : public framework::OpKernel { public: @@ -114,7 +112,7 @@ class KLDivLossGradNPUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); auto stream = dev_ctx.stream(); - Tensor loss_grad_transformed; + phi::DenseTensor loss_grad_transformed; if ("none" == reduction) { loss_grad_transformed.ShareDataWith(*loss_grad); } else { diff --git a/paddle/fluid/operators/label_smooth_op_mlu.cc b/paddle/fluid/operators/label_smooth_op_mlu.cc index 211ffc7fb2cd6..96f629e14df5c 100644 --- a/paddle/fluid/operators/label_smooth_op_mlu.cc +++ b/paddle/fluid/operators/label_smooth_op_mlu.cc @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class LabelSmoothMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/label_smooth_op_npu.cc b/paddle/fluid/operators/label_smooth_op_npu.cc index 529e8564cb19b..71bb1786bd018 100644 --- a/paddle/fluid/operators/label_smooth_op_npu.cc +++ b/paddle/fluid/operators/label_smooth_op_npu.cc @@ -18,8 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void LabelSmoothMuls(const platform::Place& place, const aclrtStream& stream, @@ -70,15 +68,15 @@ class LabelSmoothNPUKernel : public framework::OpKernel { .stream(); if (dist_t) { - Tensor tmp; - Tensor dist; - Tensor tmp2; + phi::DenseTensor tmp; + phi::DenseTensor dist; + phi::DenseTensor tmp2; LabelSmoothMuls(place, stream, in_t, (1 - epsilon), &tmp); LabelSmoothMuls(place, stream, dist_t, epsilon, &tmp2); tmp2.Resize({1, label_dim}); LabelSmoothAddBroadCast(place, stream, &tmp, &tmp2, out_t); } else { - Tensor tmp; + phi::DenseTensor tmp; LabelSmoothMuls(place, stream, in_t, (1 - epsilon), &tmp); LabelSmoothAdds(place, stream, &tmp, (epsilon / label_dim), out_t); } diff --git a/paddle/fluid/operators/layer_norm_kernel.cu.h b/paddle/fluid/operators/layer_norm_kernel.cu.h index 3d1bd7490795d..703a3b7506efc 100644 --- a/paddle/fluid/operators/layer_norm_kernel.cu.h +++ b/paddle/fluid/operators/layer_norm_kernel.cu.h @@ -33,7 +33,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index 1081df4166aac..461d77f324bcf 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; class LayerNormOp : public framework::OperatorWithKernel { @@ -210,9 +209,9 @@ class LayerNormGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_NOT_NULL( var, platform::errors::NotFound("Y@GRAD of LayerNorm Op is not found.")); - const Tensor *t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } diff --git a/paddle/fluid/operators/layer_norm_op_mlu.cc b/paddle/fluid/operators/layer_norm_op_mlu.cc index 7058f9f094923..deb7bb5045eba 100644 --- a/paddle/fluid/operators/layer_norm_op_mlu.cc +++ b/paddle/fluid/operators/layer_norm_op_mlu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; template @@ -72,7 +71,7 @@ class LayerNormMLUKernel : public framework::OpKernel { GetBasePtr(mean), GetBasePtr(variance)); } else { - Tensor tmp_scale(x->dtype()); + phi::DenseTensor tmp_scale(x->dtype()); if (!scale) { tmp_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); FillMLUTensorWithHostValue(ctx, static_cast(1), &tmp_scale); @@ -80,7 +79,7 @@ class LayerNormMLUKernel : public framework::OpKernel { tmp_scale = *scale; } - Tensor tmp_bias(x->dtype()); + phi::DenseTensor tmp_bias(x->dtype()); if (!bias) { tmp_bias.mutable_data(phi::make_ddim(scale_bias_axes), place); FillMLUTensorWithHostValue(ctx, static_cast(0), &tmp_bias); @@ -95,7 +94,7 @@ class LayerNormMLUKernel : public framework::OpKernel { scale_bias_axes.size(), scale_bias_axes.data(), CNNL_DTYPE_HALF); cnnlCastDataType_t cast_type = GetCastDataType(VT::FP32, VT::FP16); - Tensor final_scale(x->dtype()); + phi::DenseTensor final_scale(x->dtype()); if (final_scale.dtype() == DataType::FLOAT16 && tmp_scale.dtype() == DataType::FLOAT32) { final_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); @@ -110,7 +109,7 @@ class LayerNormMLUKernel : public framework::OpKernel { final_scale = tmp_scale; } - Tensor final_bias(x->dtype()); + phi::DenseTensor final_bias(x->dtype()); if (final_bias.dtype() == DataType::FLOAT16 && tmp_bias.dtype() == DataType::FLOAT32) { final_bias.mutable_data(phi::make_ddim(scale_bias_axes), place); @@ -181,7 +180,7 @@ class LayerNormGradMLUKernel : public framework::OpKernel { mean_var_axes.size(), mean_var_axes.data(), ToCnnlDataType()); MLUCnnlTensorDesc dx_desc(*dx); - Tensor tmp_scale(x->dtype()); + phi::DenseTensor tmp_scale(x->dtype()); if (!scale) { tmp_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); FillMLUTensorWithHostValue(ctx, static_cast(1), &tmp_scale); @@ -196,7 +195,7 @@ class LayerNormGradMLUKernel : public framework::OpKernel { cnnlCastDataType_t cast_fp32_to_fp16 = GetCastDataType(VT::FP32, VT::FP16); cnnlCastDataType_t cast_fp16_to_fp32 = GetCastDataType(VT::FP16, VT::FP32); - Tensor final_scale(x->dtype()); + phi::DenseTensor final_scale(x->dtype()); if (final_scale.dtype() == DataType::FLOAT16 && tmp_scale.dtype() == DataType::FLOAT32) { final_scale.mutable_data(phi::make_ddim(scale_bias_axes), place); @@ -211,14 +210,14 @@ class LayerNormGradMLUKernel : public framework::OpKernel { final_scale = tmp_scale; } - Tensor tmp_dscale(x->dtype()); + phi::DenseTensor tmp_dscale(x->dtype()); if (dscale && (tmp_dscale.dtype() == dscale->dtype())) { dscale->mutable_data(place); tmp_dscale = *dscale; } else { tmp_dscale.mutable_data(phi::make_ddim(scale_bias_axes), place); } - Tensor tmp_dbias(x->dtype()); + phi::DenseTensor tmp_dbias(x->dtype()); if (dbias && (tmp_dbias.dtype() == dbias->dtype())) { dbias->mutable_data(place); tmp_dbias = *dbias; diff --git a/paddle/fluid/operators/layer_norm_op_npu.cc b/paddle/fluid/operators/layer_norm_op_npu.cc index f529bb651c042..5d0313a8f9404 100644 --- a/paddle/fluid/operators/layer_norm_op_npu.cc +++ b/paddle/fluid/operators/layer_norm_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; using DataLayout = phi::DataLayout; @@ -75,10 +74,10 @@ class LayerNormNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor default_scale(x->type()); + phi::DenseTensor default_scale(x->type()); if (!scale) { default_scale.mutable_data(phi::make_ddim(axes), place); - Tensor value(x->type()); + phi::DenseTensor value(x->type()); value.mutable_data({1}, place); FillNpuTensorWithConstant(&value, static_cast(1.0)); const auto& runner = @@ -89,10 +88,10 @@ class LayerNormNPUKernel : public framework::OpKernel { const_cast(scale)->Resize(phi::make_ddim(axes)); } - Tensor default_bias(x->type()); + phi::DenseTensor default_bias(x->type()); if (!bias) { default_bias.mutable_data(phi::make_ddim(axes), place); - Tensor value(x->type()); + phi::DenseTensor value(x->type()); value.mutable_data({1}, place); FillNpuTensorWithConstant(&value, static_cast(0)); const auto& runner = @@ -104,7 +103,7 @@ class LayerNormNPUKernel : public framework::OpKernel { } // cast scale from LayerNormParamType to T if needed - Tensor cast_scale(x->type()); + phi::DenseTensor cast_scale(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && framework::TransToProtoVarType(scale->dtype()) == @@ -124,7 +123,7 @@ class LayerNormNPUKernel : public framework::OpKernel { } // cast bias from LayerNormParamType to T if needed - Tensor cast_bias(x->type()); + phi::DenseTensor cast_bias(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && framework::TransToProtoVarType(bias->dtype()) == @@ -147,7 +146,7 @@ class LayerNormNPUKernel : public framework::OpKernel { // mean should be of U type phi::DenseTensor* tmp_mean = mean; - Tensor cast_mean(x->type()); + phi::DenseTensor cast_mean(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && (framework::TransToProtoVarType(scale->dtype()) == @@ -164,7 +163,7 @@ class LayerNormNPUKernel : public framework::OpKernel { // same for variance phi::DenseTensor* tmp_variance = variance; - Tensor cast_variance(x->type()); + phi::DenseTensor cast_variance(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && (framework::TransToProtoVarType(scale->dtype()) == @@ -273,10 +272,10 @@ class LayerNormGradNPUKernel : public framework::OpKernel { const_cast(variance)->Resize( phi::make_ddim({new_shape})); - Tensor default_scale(x->type()); + phi::DenseTensor default_scale(x->type()); if (!scale) { default_scale.mutable_data(phi::make_ddim(axes), place); - Tensor value(x->type()); + phi::DenseTensor value(x->type()); value.mutable_data({1}, place); FillNpuTensorWithConstant(&value, static_cast(1.0)); const auto& runner = @@ -288,7 +287,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { } // cast scale from LayerNormParamType to T if needed - Tensor cast_scale(x->type()); + phi::DenseTensor cast_scale(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && framework::TransToProtoVarType(scale->dtype()) == @@ -308,7 +307,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { } // cast mean from LayerNormParamType to T if needed - Tensor cast_mean(x->type()); + phi::DenseTensor cast_mean(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && framework::TransToProtoVarType(mean->dtype()) == @@ -328,7 +327,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { } // cast variance from LayerNormParamType to T if needed - Tensor cast_variance(x->type()); + phi::DenseTensor cast_variance(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && framework::TransToProtoVarType(variance->dtype()) == @@ -347,7 +346,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { cast_variance.ShareDataWith(*variance); } - Tensor dx_(dy->type()), dscale_(dy->type()), dbias_(dy->type()); + phi::DenseTensor dx_(dy->type()), dscale_(dy->type()), dbias_(dy->type()); dx = (dx == nullptr) ? &dx_ : dx; dscale = (dscale == nullptr) ? &dscale_ : dscale; dbias = (dbias == nullptr) ? &dbias_ : dbias; @@ -361,7 +360,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // dscale should be of U type phi::DenseTensor* tmp_dscale = dscale; - Tensor cast_dscale(x->type()); + phi::DenseTensor cast_dscale(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && (framework::TransToProtoVarType(mean->dtype()) == @@ -378,7 +377,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { // same for dbias phi::DenseTensor* tmp_dbias = dbias; - Tensor cast_dbias(x->type()); + phi::DenseTensor cast_dbias(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && (framework::TransToProtoVarType(mean->dtype()) == diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index d475eab967d78..2faf47538ffa5 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -26,8 +26,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template inline void ResizeToChannelFirst(const framework::ExecutionContext& context, const phi::DenseTensor* input, diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu index 28ae524e0a4f9..d14e4c75425c9 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cu +++ b/paddle/fluid/operators/limit_by_capacity_op.cu @@ -28,8 +28,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template __global__ void limit_by_capacity_impl( const T* expc, T* cap, T* out, const int n_expert, const int n_worker) { diff --git a/paddle/fluid/operators/log_loss_op_npu.cc b/paddle/fluid/operators/log_loss_op_npu.cc index 47c6bef196be1..ed045fad4a95e 100644 --- a/paddle/fluid/operators/log_loss_op_npu.cc +++ b/paddle/fluid/operators/log_loss_op_npu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void LogLossAdds(const platform::Place& place, const aclrtStream& stream, diff --git a/paddle/fluid/operators/log_loss_op_xpu.cc b/paddle/fluid/operators/log_loss_op_xpu.cc index 59e0c15678247..87e6d42e98ad5 100644 --- a/paddle/fluid/operators/log_loss_op_xpu.cc +++ b/paddle/fluid/operators/log_loss_op_xpu.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class LogLossXPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/lookup_table_dequant_op.h b/paddle/fluid/operators/lookup_table_dequant_op.h index 3f9ec485ce4f8..1c8001e371764 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.h +++ b/paddle/fluid/operators/lookup_table_dequant_op.h @@ -27,7 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index 1ba6d6e31ecdc..04153eecc3927 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -26,7 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index e9369bcb475cc..f43fccb19e0b6 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -27,14 +27,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; constexpr int64_t kNoPadding = -1; template -static std::vector CopyIdsToVector(const Tensor &ids) { +static std::vector CopyIdsToVector(const phi::DenseTensor &ids) { auto numel = ids.numel(); const auto *src = ids.data(); std::vector ret(numel); @@ -51,7 +50,7 @@ static std::vector CopyIdsToVector(const Tensor &ids) { template struct LookupTableV2CPUFunctor { LookupTableV2CPUFunctor(const framework::ExecutionContext &context, - const Tensor *ids_t) + const phi::DenseTensor *ids_t) : context_(context), ids_t_(ids_t) {} template @@ -143,7 +142,7 @@ struct LookupTableV2CPUFunctor { private: const framework::ExecutionContext &context_; - const Tensor *ids_t_; + const phi::DenseTensor *ids_t_; }; template @@ -160,7 +159,7 @@ class LookupTableV2Kernel : public framework::OpKernel { template struct LookupTableV2GradCPUFunctor { LookupTableV2GradCPUFunctor(const framework::ExecutionContext &context, - const Tensor *ids_t) + const phi::DenseTensor *ids_t) : context_(context), ids_t_(ids_t) {} template @@ -267,7 +266,7 @@ struct LookupTableV2GradCPUFunctor { private: const framework::ExecutionContext &context_; - const Tensor *ids_t_; + const phi::DenseTensor *ids_t_; }; template diff --git a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc index de9864aeee6a1..c407d91e6b80d 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class LookupTableV2MLUKernel : public framework::OpKernel { public: @@ -84,7 +82,7 @@ class LookupTableV2GradMLUKernel : public framework::OpKernel { "Number of ids greater than int32_t::max , please check " "number of ids in LookupTableV2GradMLUKernel.")); - Tensor ids_int32(ids_t->dtype()); + phi::DenseTensor ids_int32(ids_t->dtype()); if (ids_t->dtype() != DataType::INT32) { ids_int32.mutable_data(ids_t->dims(), ctx.GetPlace()); MLUCnnlTensorDesc ids_desc(*ids_t); diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index d11ef440f8a3f..3dc94d49244b1 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; constexpr int64_t kNoPadding = -1; template @@ -53,16 +52,16 @@ class LookupTableV2NPUKernel : public framework::OpKernel { .AddOutput(*output_t); runner.Run(); } else { - Tensor tmp_table_t(table_t->type()); + phi::DenseTensor tmp_table_t(table_t->type()); tmp_table_t.mutable_data(table_t->dims(), ctx.GetPlace()); - Tensor index; + phi::DenseTensor index; index.mutable_data({1, 1}, ctx.GetPlace()); FillNpuTensorWithConstant(&index, static_cast(padding_idx)); auto updata_dim = phi::make_ddim({1, table_t->dims()[1]}); - Tensor update; + phi::DenseTensor update; update.mutable_data(updata_dim, ctx.GetPlace()); FillNpuTensorWithConstant(&update, static_cast(0)); update.Resize(updata_dim); @@ -109,7 +108,7 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel { int embedding_dim = table_grad_t->dims()[1]; if (embedding_dim % 32 == 0) { - // NOTE(pangyoki): The embedding_dim of Tensor used in + // NOTE(pangyoki): The embedding_dim of phi::DenseTensor used in // EmbeddingDenseGrad must be an integer multiple of 32. int num_weights = table_grad_t->dims()[0]; const auto &runner = @@ -137,7 +136,7 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel { {{"use_locking", true}}); runner_scatter.Run(stream); } else { - Tensor casted_ids_t; + phi::DenseTensor casted_ids_t; if (framework::TransToProtoVarType(ids_t->dtype()) != framework::proto::VarType::INT32) { casted_ids_t.mutable_data(ids_t->dims(), ctx.GetPlace()); diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index aa2596e6a22ba..b772aa82e9d7e 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -46,8 +46,6 @@ struct LRNFunctor { template class LRNKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; - // f(x) = x * ( k + alpha * SUM((x)^2) )^(-beta) // x represents inputs // f(x) represents outputs @@ -141,7 +139,6 @@ struct LRNGradFunctor { template class LRNGradKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& ctx) const override { const phi::DenseTensor& x = *ctx.Input("X"); const phi::DenseTensor& out = *ctx.Input("Out"); diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index dc4f2f1548612..d5ced3edd2add 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template inline void ReorderInitState(const DeviceContext& ctx, const phi::DenseTensor& src, @@ -74,9 +72,9 @@ class LSTMKernel : public framework::OpKernel { framework::DDim dims({in_dims[0], frame_size}); if (bias) { - Tensor b = *bias; + phi::DenseTensor b = *bias; b.Resize({bias->numel(), 1}); - Tensor gate_bias = b.Slice(0, 4 * frame_size); + phi::DenseTensor gate_bias = b.Slice(0, 4 * frame_size); phi::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -95,7 +93,7 @@ class LSTMKernel : public framework::OpKernel { lstm_value.check_og = nullptr; } lstm_value.prev_state_value = nullptr; - Tensor ordered_c0; + phi::DenseTensor ordered_c0; framework::Vector order(batch_gate->lod()[2]); @@ -134,10 +132,10 @@ class LSTMKernel : public framework::OpKernel { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); - Tensor gate_t = batch_gate->Slice(bstart, bend); - Tensor out_t = batch_hidden.Slice(bstart, bend); - Tensor cell_t = batch_cell.Slice(bstart, bend); - Tensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend); + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor out_t = batch_hidden.Slice(bstart, bend); + phi::DenseTensor cell_t = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend); int cur_batch_size = bend - bstart; @@ -160,7 +158,7 @@ class LSTMKernel : public framework::OpKernel { // Since the batch computing for LSTM reorders the input sequence // according to their length. The initialized hidden state also needs // to reorder. - Tensor ordered_h0; + phi::DenseTensor ordered_h0; ReorderInitState( device_ctx, *hidden_t0, order, &ordered_h0, true); blas.MatMul(ordered_h0, @@ -237,7 +235,7 @@ class LSTMGradKernel : public framework::OpKernel { // ordered_h0/c0 is the reordered hidden/cell initialization. // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. - Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; + phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; framework::Vector order(batch_gate->lod()[2]); if (c0) { @@ -328,24 +326,24 @@ class LSTMGradKernel : public framework::OpKernel { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); - Tensor gate = batch_gate->Slice(bstart, bend); - Tensor cell = batch_cell.Slice(bstart, bend); - Tensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); + phi::DenseTensor gate = batch_gate->Slice(bstart, bend); + phi::DenseTensor cell = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); lstm_value.gate_value = gate.data(); lstm_value.state_value = cell.data(); lstm_value.state_active_value = cell_pre_act.data(); - Tensor out_g = batch_hidden_g.Slice(bstart, bend); - Tensor gate_g = batch_gate_g.Slice(bstart, bend); - Tensor cell_g = batch_cell_g.Slice(bstart, bend); + phi::DenseTensor out_g = batch_hidden_g.Slice(bstart, bend); + phi::DenseTensor gate_g = batch_gate_g.Slice(bstart, bend); + phi::DenseTensor cell_g = batch_cell_g.Slice(bstart, bend); lstm_grad.state_grad = cell_g.data(); lstm_grad.gate_grad = gate_g.data(); lstm_grad.output_grad = out_g.data(); if (n > 0) { int bstart_pre = static_cast(batch_starts[n - 1]); - Tensor cell_pre = batch_cell.Slice(bstart_pre, bstart); - Tensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); + phi::DenseTensor cell_pre = batch_cell.Slice(bstart_pre, bstart); + phi::DenseTensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); lstm_value.prev_state_value = cell_pre.data(); lstm_grad.prev_state_grad = cell_pre_g.data(); } else { @@ -424,9 +422,9 @@ class LSTMGradKernel : public framework::OpKernel { } if (bias && bias_g) { /* backward bias */ - Tensor b_g = *bias_g; + phi::DenseTensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); - Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); + phi::DenseTensor gate_bias_g = b_g.Slice(0, 4 * frame_size); phi::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 8056bf0bd49f2..c26a421966e7b 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -29,7 +29,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using platform::Transform; template { framework::DDim proj_dims({in_dims[0], proj_weight->dims()[1]}); if (bias) { - Tensor b = *bias; + phi::DenseTensor b = *bias; b.Resize({bias->numel(), 1}); - Tensor gate_bias = b.Slice(0, 4 * frame_size); + phi::DenseTensor gate_bias = b.Slice(0, 4 * frame_size); phi::funcs::RowwiseAdd add_bias; add_bias(device_ctx, *batch_gate, gate_bias, batch_gate); } @@ -156,8 +155,8 @@ class LSTMPKernel : public framework::OpKernel { lstmp_value.check_og = nullptr; } lstmp_value.prev_state_value = nullptr; - Tensor ordered_c0; - Tensor ordered_h0; + phi::DenseTensor ordered_c0; + phi::DenseTensor ordered_h0; framework::Vector order(batch_gate->lod()[2]); @@ -195,11 +194,11 @@ class LSTMPKernel : public framework::OpKernel { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); - Tensor gate_t = batch_gate->Slice(bstart, bend); - Tensor hidden_t = batch_hidden->Slice(bstart, bend); - Tensor proj_t = batch_proj.Slice(bstart, bend); - Tensor cell_t = batch_cell.Slice(bstart, bend); - Tensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend); + phi::DenseTensor gate_t = batch_gate->Slice(bstart, bend); + phi::DenseTensor hidden_t = batch_hidden->Slice(bstart, bend); + phi::DenseTensor proj_t = batch_proj.Slice(bstart, bend); + phi::DenseTensor cell_t = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act_t = batch_cell_pre_act->Slice(bstart, bend); int cur_batch_size = bend - bstart; @@ -349,7 +348,7 @@ class LSTMPGradKernel : public framework::OpKernel { // ordered_h0/c0 is the reordered hidden/cell initialization. // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. - Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; + phi::DenseTensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; framework::Vector order(batch_gate->lod()[2]); @@ -445,8 +444,8 @@ class LSTMPGradKernel : public framework::OpKernel { int bstart = static_cast(batch_starts[n]); int bend = static_cast(batch_starts[n + 1]); - Tensor cur_proj = batch_proj.Slice(bstart, bend); - Tensor proj_g = batch_proj_g.Slice(bstart, bend); + phi::DenseTensor cur_proj = batch_proj.Slice(bstart, bend); + phi::DenseTensor proj_g = batch_proj_g.Slice(bstart, bend); if (proj_clip && proj_clip > 0.0) { T* dx_data = proj_g.data(); @@ -472,7 +471,7 @@ class LSTMPGradKernel : public framework::OpKernel { proj_g_dev); } /* hidden state backwarad */ - Tensor out_g = batch_hidden_g.Slice(bstart, bend); + phi::DenseTensor out_g = batch_hidden_g.Slice(bstart, bend); blas.MatMul(proj_g, false, *proj_weight, @@ -482,7 +481,7 @@ class LSTMPGradKernel : public framework::OpKernel { static_cast(0.0)); /* projection weight backward*/ if (proj_weight_g) { - Tensor hidden_t = batch_hidden->Slice(bstart, bend); + phi::DenseTensor hidden_t = batch_hidden->Slice(bstart, bend); blas.MatMul(hidden_t, true, proj_g, @@ -492,23 +491,23 @@ class LSTMPGradKernel : public framework::OpKernel { static_cast(1.0)); } - Tensor gate = batch_gate->Slice(bstart, bend); - Tensor cell = batch_cell.Slice(bstart, bend); - Tensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); + phi::DenseTensor gate = batch_gate->Slice(bstart, bend); + phi::DenseTensor cell = batch_cell.Slice(bstart, bend); + phi::DenseTensor cell_pre_act = batch_cell_pre_act->Slice(bstart, bend); lstmp_value.gate_value = gate.data(); lstmp_value.state_value = cell.data(); lstmp_value.state_active_value = cell_pre_act.data(); - Tensor gate_g = batch_gate_g.Slice(bstart, bend); - Tensor cell_g = batch_cell_g.Slice(bstart, bend); + phi::DenseTensor gate_g = batch_gate_g.Slice(bstart, bend); + phi::DenseTensor cell_g = batch_cell_g.Slice(bstart, bend); lstmp_grad.state_grad = cell_g.data(); lstmp_grad.gate_grad = gate_g.data(); lstmp_grad.output_grad = out_g.data(); if (n > 0) { int bstart_pre = static_cast(batch_starts[n - 1]); - Tensor cell_pre = batch_cell.Slice(bstart_pre, bstart); - Tensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); + phi::DenseTensor cell_pre = batch_cell.Slice(bstart_pre, bstart); + phi::DenseTensor cell_pre_g = batch_cell_g.Slice(bstart_pre, bstart); lstmp_value.prev_state_value = cell_pre.data(); lstmp_grad.prev_state_grad = cell_pre_g.data(); } else { @@ -589,9 +588,9 @@ class LSTMPGradKernel : public framework::OpKernel { } if (bias && bias_g) { /* backward bias */ - Tensor b_g = *bias_g; + phi::DenseTensor b_g = *bias_g; b_g.Resize({bias_g->numel(), 1}); - Tensor gate_bias_g = b_g.Slice(0, 4 * frame_size); + phi::DenseTensor gate_bias_g = b_g.Slice(0, 4 * frame_size); phi::funcs::ColwiseSum col_sum; col_sum(device_ctx, batch_gate_g, &gate_bias_g); } diff --git a/paddle/fluid/operators/masked_select_op_mlu.cc b/paddle/fluid/operators/masked_select_op_mlu.cc index 50c9973721836..86e4029512b07 100644 --- a/paddle/fluid/operators/masked_select_op_mlu.cc +++ b/paddle/fluid/operators/masked_select_op_mlu.cc @@ -39,7 +39,7 @@ class MaskedSelectedMLUKernel : public framework::OpKernel { input_dim, mask_dim)); - Tensor number(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor number(framework::TransToPhiDataType(VT::INT32)); void* number_ptr = number.mutable_data({1}, ctx.GetPlace()); out->Resize(mask->dims()); @@ -72,7 +72,7 @@ class MaskedSelectedGradMLUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - Tensor mask_int32, out_size; + phi::DenseTensor mask_int32, out_size; std::vector out_size_vec; mask_int32.mutable_data(mask->dims(), ctx.GetPlace()); out_size.mutable_data({1}, ctx.GetPlace()); @@ -118,10 +118,10 @@ class MaskedSelectedGradMLUKernel : public framework::OpKernel { paddle::framework::TensorToVector(out_size, dev_ctx, &out_size_vec); dev_ctx.Wait(); - Tensor mask_int32_tmp; + phi::DenseTensor mask_int32_tmp; mask_int32_tmp.ShareDataWith(mask_int32); mask_int32_tmp.Resize({mask_int32.numel()}); - Tensor topk_v2_out(framework::TransToPhiDataType(VT::INT32)), + phi::DenseTensor topk_v2_out(framework::TransToPhiDataType(VT::INT32)), indices_int32(framework::TransToPhiDataType(VT::INT32)); topk_v2_out.mutable_data({mask_int32.numel()}, ctx.GetPlace()); indices_int32.mutable_data({mask_int32.numel()}, ctx.GetPlace()); @@ -145,7 +145,7 @@ class MaskedSelectedGradMLUKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); - Tensor indices_int32_out; + phi::DenseTensor indices_int32_out; indices_int32_out.mutable_data({out_size_vec[0]}, ctx.GetPlace()); memory::Copy(ctx.GetPlace(), GetBasePtr(&indices_int32_out), @@ -154,7 +154,7 @@ class MaskedSelectedGradMLUKernel : public framework::OpKernel { out_size_vec[0] * sizeof(int32_t), stream); - Tensor y_grad_tmp_out; + phi::DenseTensor y_grad_tmp_out; y_grad_tmp_out.mutable_data({out_size_vec[0]}, ctx.GetPlace()); MLUCnnlTensorDesc y_grad_tmp_out_desc(y_grad_tmp_out); memory::Copy(ctx.GetPlace(), @@ -164,7 +164,7 @@ class MaskedSelectedGradMLUKernel : public framework::OpKernel { out_size_vec[0] * sizeof(T), stream); - Tensor indices_int32_tmp; + phi::DenseTensor indices_int32_tmp; indices_int32_tmp.ShareDataWith(indices_int32_out); indices_int32_tmp.Resize({out_size_vec[0], 1}); MLUCnnlTensorDesc indices_int32_tmp_desc(indices_int32_tmp); diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index facf44725e2b6..3473a051b7324 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoD = framework::LoD; void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { @@ -353,7 +352,7 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel { auto* d_x = ctx.Output(framework::GradVarName("X")); auto* d_y = ctx.Output(framework::GradVarName("Y")); - Tensor tmp_grad; + phi::DenseTensor tmp_grad; tmp_grad.Resize(tmp->dims()); auto* d_tmp_data = tmp_grad.mutable_data(ctx.GetPlace()); auto* top_diff = d_out->data(); diff --git a/paddle/fluid/operators/match_matrix_tensor_op.h b/paddle/fluid/operators/match_matrix_tensor_op.h index 72e99222ddffb..6aa5b12ff6778 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.h +++ b/paddle/fluid/operators/match_matrix_tensor_op.h @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class MatchMatrixTensorOP : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 0038b25fb42de..0b6dc510f477f 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -26,8 +26,6 @@ namespace operators { namespace math { -using Tensor = phi::DenseTensor; - /* * \brief Context projection concatenates features in adjacent time-steps in * a sequence. The i-th row of the output is the concatenation of @@ -117,13 +115,13 @@ class ContextProjectFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + phi::DenseTensor out_t = col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); if (input_row_begin < input_row_end) { - Tensor in_t = in.Slice(input_row_begin, input_row_end); + phi::DenseTensor in_t = in.Slice(input_row_begin, input_row_end); std::vector output_shape( {sequence_height, @@ -151,8 +149,9 @@ class ContextProjectFunctor { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { if (lod_level_0[i] == lod_level_0[i + 1]) continue; - Tensor out_t = col->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + phi::DenseTensor out_t = + col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); @@ -167,9 +166,9 @@ class ContextProjectFunctor { for (int k = 0; k < padding_rows; ++k) { int padding_size = k + context_length < up_pad ? context_length : up_pad - k; - Tensor out_t_sub = out_t.Slice(k * context_length, - k * context_length + padding_size); - Tensor w_sub = padding_data->Slice(k, k + padding_size); + phi::DenseTensor out_t_sub = out_t.Slice( + k * context_length, k * context_length + padding_size); + phi::DenseTensor w_sub = padding_data->Slice(k, k + padding_size); framework::TensorCopy( w_sub, context.GetPlace(), context, &out_t_sub); } @@ -196,10 +195,10 @@ class ContextProjectFunctor { if (padding_begin > 0 || sequence_height == context_start) padding_idx = padding_begin + t; - Tensor out_t_sub = out_t.Slice( + phi::DenseTensor out_t_sub = out_t.Slice( (down_pad_begin_row + t) * context_length - padding_size, (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data->Slice( + phi::DenseTensor w_sub = padding_data->Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); framework::TensorCopy( w_sub, context.GetPlace(), context, &out_t_sub); @@ -250,13 +249,14 @@ class ContextProjectGradFunctor { : static_cast(lod_level_0[i]); input_row_end = static_cast(lod_level_0[i + 1]); - Tensor out_t = col->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + phi::DenseTensor out_t = + col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); if (input_row_begin < input_row_end) { - Tensor in_t = in.Slice(input_row_begin, input_row_end); + phi::DenseTensor in_t = in.Slice(input_row_begin, input_row_end); std::vector output_shape( {sequence_height, @@ -283,8 +283,9 @@ class ContextProjectGradFunctor { for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { if (lod_level_0[i] == lod_level_0[i + 1]) continue; - Tensor out_t = col->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); + phi::DenseTensor out_t = + col->Slice(static_cast(lod_level_0[i]), + static_cast(lod_level_0[i + 1])); sequence_height = static_cast(out_t.dims()[0]); out_t.Resize({static_cast(sequence_height) * context_length, @@ -297,9 +298,9 @@ class ContextProjectGradFunctor { for (int k = 0; k < padding_rows; ++k) { int padding_size = k + context_length < up_pad ? context_length : up_pad - k; - Tensor out_t_sub = out_t.Slice(k * context_length, - k * context_length + padding_size); - Tensor w_sub = padding_data->Slice(k, k + padding_size); + phi::DenseTensor out_t_sub = out_t.Slice( + k * context_length, k * context_length + padding_size); + phi::DenseTensor w_sub = padding_data->Slice(k, k + padding_size); blas.AXPY(w_sub.numel(), static_cast(1), out_t_sub.data(), @@ -329,10 +330,10 @@ class ContextProjectGradFunctor { if (padding_begin > 0 || sequence_height == context_start) padding_idx = padding_begin + t; - Tensor out_t_sub = out_t.Slice( + phi::DenseTensor out_t_sub = out_t.Slice( (down_pad_begin_row + t) * context_length - padding_size, (down_pad_begin_row + t) * context_length); - Tensor w_sub = padding_data->Slice( + phi::DenseTensor w_sub = padding_data->Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); blas.AXPY(w_sub.numel(), static_cast(1), diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 1ba2d8f18ca1c..f4198acfd830c 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -55,9 +55,9 @@ static void CheckEighResult(const int batch, const int info) { template struct MatrixEighFunctor { void operator()(const framework::ExecutionContext &ctx, - const Tensor &input, - Tensor *eigen_values, - Tensor *eigen_vectors, + const phi::DenseTensor &input, + phi::DenseTensor *eigen_values, + phi::DenseTensor *eigen_vectors, bool is_lower, bool has_vectors); }; @@ -69,9 +69,9 @@ template struct MatrixEighFunctor { public: void operator()(const framework::ExecutionContext &ctx, - const Tensor &input, - Tensor *eigen_values, - Tensor *eigen_vectors, + const phi::DenseTensor &input, + phi::DenseTensor *eigen_values, + phi::DenseTensor *eigen_vectors, bool is_lower, bool has_vectors) { using ValueType = phi::dtype::Real; @@ -80,7 +80,7 @@ struct MatrixEighFunctor { auto dito = math::DeviceIndependenceTensorOperations(ctx); - Tensor input_trans; + phi::DenseTensor input_trans; // lapack is a column-major storge, transpose make the input to // have a continuous memory layout input_trans = dito.Transpose(input); @@ -124,7 +124,7 @@ struct MatrixEighFunctor { lwork = std::max(1, static_cast(lwork_opt)); liwork = std::max(1, iwork_opt); - Tensor rwork_tensor; + phi::DenseTensor rwork_tensor; ValueType *rwork_data = nullptr; // complex type @@ -134,7 +134,7 @@ struct MatrixEighFunctor { rwork_data = rwork_tensor.mutable_data( phi::make_ddim({lrwork}), ctx.GetPlace()); } - Tensor iwork_tensor, work_tensor; + phi::DenseTensor iwork_tensor, work_tensor; auto *iwork_data = iwork_tensor.mutable_data(phi::make_ddim({liwork}), ctx.GetPlace()); auto *work_data = @@ -179,9 +179,9 @@ template struct MatrixEighFunctor { public: void operator()(const framework::ExecutionContext &ctx, - const Tensor &input, - Tensor *eigen_values, - Tensor *eigen_vectors, + const phi::DenseTensor &input, + phi::DenseTensor *eigen_values, + phi::DenseTensor *eigen_vectors, bool is_lower, bool has_vectors) { using ValueType = phi::dtype::Real; @@ -190,7 +190,7 @@ struct MatrixEighFunctor { auto &dev_ctx = ctx.template device_context(); auto dito = math::DeviceIndependenceTensorOperations(ctx); - Tensor input_trans; + phi::DenseTensor input_trans; input_trans = dito.Transpose(input); auto *input_vector = input_trans.data(); auto &dims = input.dims(); diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index e3cc5a5741b02..0c6b49729546c 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -31,8 +31,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; - template __device__ T gpu_adjust_prob(const T prob, const int num_samples, @@ -146,7 +144,7 @@ void GPUSampleWithProb::operator()(const phi::GPUContext& context, int s_size = num_samples; framework::DDim s_dim{s_size}; - Tensor s; + phi::DenseTensor s; int64_t* s_data = s.mutable_data(s_dim, platform::CPUPlace()); math::LogUniformSampler sampler(dict_size, seed); diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h index 2464ac25186f0..7c60be6841552 100644 --- a/paddle/fluid/operators/math/sample_prob.h +++ b/paddle/fluid/operators/math/sample_prob.h @@ -27,8 +27,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; - /* UNDERSTAND: utility function to adjust probability for unique sampling, return whatever as it is if not using unique samping */ template diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 65d4a479a4988..53b3b632dd4be 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -24,7 +24,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; template @@ -405,7 +404,7 @@ class SequencePoolFunctor { } auto& place = *context.eigen_device(); for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { - Tensor out_t = output->Slice(i, i + 1); + phi::DenseTensor out_t = output->Slice(i, i + 1); int64_t w = input.numel() / input.dims()[0]; if (lod[i] == lod[i + 1]) { for (int j = 0; j < w; ++j) { @@ -413,7 +412,7 @@ class SequencePoolFunctor { } continue; } - Tensor in_t = + phi::DenseTensor in_t = input.Slice(static_cast(lod[i]), static_cast(lod[i + 1])); int64_t h = static_cast(lod[i + 1] - lod[i]); auto in_e = EigenMatrix::From(in_t, phi::make_ddim({h, w})); diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index c70e1e3e7405a..b7a9b9a19c970 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -23,7 +23,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using DataLayout = platform::DataLayout; template @@ -42,7 +41,7 @@ void SoftmaxCUDNNFunctor::operator()( if (cudnn_tensor_dims.size() == 5) { layout = DataLayout::kNCDHW; } - // NOTE(*) : cudnn softmax only support >= 4D Tensor, + // NOTE(*) : cudnn softmax only support >= 4D phi::DenseTensor, // fill 1 at unused dims if (cudnn_tensor_dims.size() <= 2) { cudnn_tensor_dims.resize(4, 1); @@ -95,7 +94,7 @@ void SoftmaxGradCUDNNFunctor::operator()( if (cudnn_tensor_dims.size() == 5) { layout = DataLayout::kNCDHW; } - // NOTE(*) : cudnn softmax only support >= 4D Tensor, + // NOTE(*) : cudnn softmax only support >= 4D phi::DenseTensor, // fill 1 at unused dims if (cudnn_tensor_dims.size() <= 2) { cudnn_tensor_dims.resize(4, 1); diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu index 3b467448ac09d..22bdc48768dae 100644 --- a/paddle/fluid/operators/math/tree2col.cu +++ b/paddle/fluid/operators/math/tree2col.cu @@ -20,7 +20,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; using Node = paddle::operators::math::TreeNode; template __global__ void tree2col(const T* eta, @@ -65,7 +64,7 @@ class Tree2ColFunctor { auto feature_dims = node_features.dims(); phi::funcs::SetConstant constant; - Tensor EdgeSet_cpu; + phi::DenseTensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); int64_t feature_size = feature_dims[1]; size_t patch_elem_size = 3 * static_cast(feature_size); @@ -83,7 +82,7 @@ class Tree2ColFunctor { } size_t patch_size = processing_list.size(); - Tensor node_cpu, node_gpu, eta_cpu, eta_gpu, index_cpu, index_gpu; + phi::DenseTensor node_cpu, node_gpu, eta_cpu, eta_gpu, index_cpu, index_gpu; int* node = node_cpu.mutable_data({static_cast(total_size)}, cpu_place); T* eta = eta_cpu.mutable_data({static_cast(total_size * 3)}, @@ -142,7 +141,7 @@ class Col2TreeFunctor { auto output_dims = patch_grad.dims(); phi::funcs::SetConstant constant; - Tensor EdgeSet_cpu; + phi::DenseTensor EdgeSet_cpu; framework::TensorCopy(EdgeSet, cpu_place, &EdgeSet_cpu); int64_t output_size = output_dims[1]; size_t patch_elem_size = 3 * static_cast(output_size); @@ -168,7 +167,7 @@ class Col2TreeFunctor { total_size += tmp.size(); } - Tensor node_cpu, node_gpu, eta_cpu, eta_gpu, index_cpu, index_gpu; + phi::DenseTensor node_cpu, node_gpu, eta_cpu, eta_gpu, index_cpu, index_gpu; int* node = node_cpu.mutable_data({static_cast(total_size)}, cpu_place); T* eta = eta_cpu.mutable_data({static_cast(total_size * 3)}, diff --git a/paddle/fluid/operators/matmul_op_mlu.cc b/paddle/fluid/operators/matmul_op_mlu.cc index e55996903a7d1..84d2f031d4bcb 100644 --- a/paddle/fluid/operators/matmul_op_mlu.cc +++ b/paddle/fluid/operators/matmul_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template static void Mul(const framework::ExecutionContext& ctx, const phi::DenseTensor& X, @@ -183,7 +181,7 @@ class MatMulMLUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp; + phi::DenseTensor x_temp, y_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); if (x_ndim == 1) { @@ -281,7 +279,7 @@ class MatMulGradMLUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp, dout_temp; + phi::DenseTensor x_temp, y_temp, dout_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); dout_temp.ShareDataWith(*dOut); @@ -335,7 +333,7 @@ class MatMulGradMLUKernel : public framework::OpKernel { std::copy(y_dims.end() - 2, y_dims.end(), y_bcast_dims.end() - 2); if (dX) { - Tensor dx_temp(X->type()); + phi::DenseTensor dx_temp(X->type()); if (x_dims != x_bcast_dims) { dx_temp.Resize(phi::make_ddim(x_bcast_dims)); } else { @@ -356,7 +354,7 @@ class MatMulGradMLUKernel : public framework::OpKernel { } if (dY) { - Tensor dy_temp(Y->type()); + phi::DenseTensor dy_temp(Y->type()); if (y_dims != y_bcast_dims) { dy_temp.Resize(phi::make_ddim(y_bcast_dims)); } else { diff --git a/paddle/fluid/operators/matmul_op_npu.cc b/paddle/fluid/operators/matmul_op_npu.cc index 31b352b90f6a8..8ab395e8aa3e4 100644 --- a/paddle/fluid/operators/matmul_op_npu.cc +++ b/paddle/fluid/operators/matmul_op_npu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -35,7 +34,7 @@ static void Mul(const framework::ExecutionContext& ctx, const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {*Out}, {}); runner_dx.Run(stream); } else { - Tensor Out_temp(Out->dtype()); + phi::DenseTensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& runner_dx = NpuOpRunner("Mul", {X, Y}, {Out_temp}, {}); runner_dx.Run(stream); @@ -59,7 +58,7 @@ static void Dot(const framework::ExecutionContext& ctx, const auto& runner = NpuOpRunner("Dot", {X, Y}, {*Out}); runner.Run(stream); } else { - Tensor Out_temp(Out->dtype()); + phi::DenseTensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("Dot", {X, Y}, {Out_temp}); out_temp_runner.Run(stream); @@ -89,7 +88,7 @@ static void MatMul2D(const framework::ExecutionContext& ctx, {{"transpose_x1", trans_x}, {"transpose_x2", trans_y}}); runner.Run(stream); } else { - Tensor Out_temp(Out->dtype()); + phi::DenseTensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("MatMul", @@ -123,7 +122,7 @@ static void MatMulND(const framework::ExecutionContext& ctx, {{"adj_x1", trans_x}, {"adj_x2", trans_y}}); runner.Run(stream); } else { - Tensor Out_temp(Out->dtype()); + phi::DenseTensor Out_temp(Out->dtype()); Out_temp.mutable_data(Out->dims(), ctx.GetPlace()); const auto& out_temp_runner = NpuOpRunner("BatchMatMul", @@ -200,7 +199,7 @@ class MatMulNPUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp; + phi::DenseTensor x_temp, y_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); if (x_ndim == 1) { @@ -268,7 +267,7 @@ class MatMulNPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->dtype()); + phi::DenseTensor x_temp_brd(X->dtype()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(phi::make_ddim(x_broadcast_dims)); @@ -283,7 +282,7 @@ class MatMulNPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->dtype()); + phi::DenseTensor y_temp_brd(Y->dtype()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(phi::make_ddim(y_broadcast_dims)); @@ -332,7 +331,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { // Case 1: [K] x [K] = [1] if (x_ndim == 1 && y_ndim == 1) { - Tensor dout_temp(dOut->dtype()); + phi::DenseTensor dout_temp(dOut->dtype()); dout_temp.Resize(X->dims()); dout_temp.mutable_data(ctx.GetPlace()); NpuOpRunner runner; @@ -352,7 +351,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp, dout_temp; + phi::DenseTensor x_temp, y_temp, dout_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); dout_temp.ShareDataWith(*dOut); @@ -434,7 +433,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->dtype()); + phi::DenseTensor x_temp_brd(X->dtype()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(phi::make_ddim(x_broadcast_dims)); @@ -449,7 +448,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->dtype()); + phi::DenseTensor y_temp_brd(Y->dtype()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(phi::make_ddim(y_broadcast_dims)); @@ -480,7 +479,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { alpha); } } else { - Tensor dx_temp(X->dtype()); + phi::DenseTensor dx_temp(X->dtype()); dx_temp.Resize(phi::make_ddim(x_broadcast_dims)); if (transpose_x) { MatMulND(ctx, @@ -520,7 +519,7 @@ class MatMulGradNPUKernel : public framework::OpKernel { alpha); } } else { - Tensor dy_temp(Y->dtype()); + phi::DenseTensor dy_temp(Y->dtype()); dy_temp.Resize(phi::make_ddim(y_broadcast_dims)); if (transpose_y) { MatMulND(ctx, diff --git a/paddle/fluid/operators/matmul_v2_op_mlu.cc b/paddle/fluid/operators/matmul_v2_op_mlu.cc index 134819b7920a0..db7a92409bf6c 100644 --- a/paddle/fluid/operators/matmul_v2_op_mlu.cc +++ b/paddle/fluid/operators/matmul_v2_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template static void Mul(const framework::ExecutionContext& ctx, const phi::DenseTensor& X, @@ -193,7 +191,7 @@ class MatMulV2MLUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp; + phi::DenseTensor x_temp, y_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); if (x_ndim == 1) { @@ -290,7 +288,7 @@ class MatMulGradV2MLUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp, dout_temp; + phi::DenseTensor x_temp, y_temp, dout_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); dout_temp.ShareDataWith(*dOut); @@ -344,7 +342,7 @@ class MatMulGradV2MLUKernel : public framework::OpKernel { std::copy(y_dims.end() - 2, y_dims.end(), y_bcast_dims.end() - 2); if (dX) { - Tensor dx_temp(X->type()); + phi::DenseTensor dx_temp(X->type()); if (x_dims != x_bcast_dims) { dx_temp.Resize(phi::make_ddim(x_bcast_dims)); } else { @@ -375,7 +373,7 @@ class MatMulGradV2MLUKernel : public framework::OpKernel { ctx, x_temp, dout_temp, dY, !trans_x, false); } } else { - Tensor dy_temp(Y->type()); + phi::DenseTensor dy_temp(Y->type()); if (y_dims != y_bcast_dims) { dy_temp.Resize(phi::make_ddim(y_bcast_dims)); } else { diff --git a/paddle/fluid/operators/matmul_v2_op_npu.cc b/paddle/fluid/operators/matmul_v2_op_npu.cc index 4df3de71134ed..715171452a987 100644 --- a/paddle/fluid/operators/matmul_v2_op_npu.cc +++ b/paddle/fluid/operators/matmul_v2_op_npu.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -67,7 +66,7 @@ void MatMulND(const framework::ExecutionContext& ctx, const bool trans_x, const bool trans_y) { Out->mutable_data(ctx.GetPlace()); - Tensor x_fp32, y_fp32, out_fp32; + phi::DenseTensor x_fp32, y_fp32, out_fp32; x_fp32.Resize(X.dims()); y_fp32.Resize(Y.dims()); out_fp32.Resize(Out->dims()); @@ -173,7 +172,7 @@ class MatMulV2NPUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp; + phi::DenseTensor x_temp, y_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); if (x_ndim == 1) { @@ -239,7 +238,7 @@ class MatMulV2NPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->type()); + phi::DenseTensor x_temp_brd(X->type()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(phi::make_ddim(x_broadcast_dims)); @@ -254,7 +253,7 @@ class MatMulV2NPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->type()); + phi::DenseTensor y_temp_brd(Y->type()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(phi::make_ddim(y_broadcast_dims)); @@ -295,7 +294,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { // Case 1: [K] x [K] = [1] if (x_ndim == 1 && y_ndim == 1) { - Tensor dout_temp(dOut->type()); + phi::DenseTensor dout_temp(dOut->type()); dout_temp.Resize(X->dims()); dout_temp.mutable_data(ctx.GetPlace()); NpuOpRunner runner; @@ -319,7 +318,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { } // Resize dim 1 to 2 - Tensor x_temp, y_temp, dout_temp; + phi::DenseTensor x_temp, y_temp, dout_temp; x_temp.ShareDataWith(*X); y_temp.ShareDataWith(*Y); dout_temp.ShareDataWith(*dOut); @@ -396,7 +395,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { std::copy(x_dims.end() - 2, x_dims.end(), x_broadcast_dims.end() - 2); std::copy(y_dims.end() - 2, y_dims.end(), y_broadcast_dims.end() - 2); - Tensor x_temp_brd(X->type()); + phi::DenseTensor x_temp_brd(X->type()); if (x_dims == x_broadcast_dims) { x_temp_brd.ShareDataWith(*X); x_temp_brd.Resize(phi::make_ddim(x_broadcast_dims)); @@ -411,7 +410,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { .Run(stream); } - Tensor y_temp_brd(Y->type()); + phi::DenseTensor y_temp_brd(Y->type()); if (y_dims == y_broadcast_dims) { y_temp_brd.ShareDataWith(*Y); y_temp_brd.Resize(phi::make_ddim(y_broadcast_dims)); @@ -434,7 +433,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { MatMulND(ctx, stream, dout_temp, y_temp_brd, dX, false, !trans_y); } } else { - Tensor dx_temp(X->type()); + phi::DenseTensor dx_temp(X->type()); dx_temp.Resize(phi::make_ddim(x_broadcast_dims)); if (trans_x) { MatMulND( @@ -454,7 +453,7 @@ class MatMulV2GradNPUKernel : public framework::OpKernel { MatMulND(ctx, stream, x_temp_brd, dout_temp, dY, !trans_x, false); } } else { - Tensor dy_temp(Y->type()); + phi::DenseTensor dy_temp(Y->type()); dy_temp.Resize(phi::make_ddim(y_broadcast_dims)); if (trans_y) { MatMulND( diff --git a/paddle/fluid/operators/mean_iou_op.h b/paddle/fluid/operators/mean_iou_op.h index 7681af011e663..9be97f5ba958e 100644 --- a/paddle/fluid/operators/mean_iou_op.h +++ b/paddle/fluid/operators/mean_iou_op.h @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template { auto out_correct_t = EigenTensor::From(*out_correct); // Tmp tensor - Tensor denominator; - Tensor valid_count; - Tensor iou_sum; + phi::DenseTensor denominator; + phi::DenseTensor valid_count; + phi::DenseTensor iou_sum; // get data ptr of tmp tensor int* denominator_data = denominator.mutable_data( diff --git a/paddle/fluid/operators/mean_op_mlu.cc b/paddle/fluid/operators/mean_op_mlu.cc index 8fea989941c88..e9266b30fcd01 100644 --- a/paddle/fluid/operators/mean_op_mlu.cc +++ b/paddle/fluid/operators/mean_op_mlu.cc @@ -20,8 +20,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class MeanMLUKernel : public framework::OpKernel { public: @@ -79,12 +77,13 @@ class MeanMLUGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto output_grad = context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(output_grad->numel(), - 1, - platform::errors::InvalidArgument( - "Mean Gradient Input Tensor len should be 1. But " - "received Out@Grad's elements num is %d.", - output_grad->numel())); + PADDLE_ENFORCE_EQ( + output_grad->numel(), + 1, + platform::errors::InvalidArgument( + "Mean Gradient Input phi::DenseTensor len should be 1. But " + "received Out@Grad's elements num is %d.", + output_grad->numel())); auto input_grad = context.Output(framework::GradVarName("X")); input_grad->mutable_data(context.GetPlace()); @@ -102,7 +101,7 @@ class MeanMLUGradKernel : public framework::OpKernel { } // means - Tensor mean_var(output_grad->dtype()); + phi::DenseTensor mean_var(output_grad->dtype()); mean_var.mutable_data(input_grad->dims(), context.GetPlace()); MLUCnnlTensorDesc mean_var_desc( mean_var, CNNL_LAYOUT_ARRAY, ToCnnlDataType(mean_var.dtype())); diff --git a/paddle/fluid/operators/mean_op_npu.cc b/paddle/fluid/operators/mean_op_npu.cc index 99fd77dd7f7df..3417045690ff6 100644 --- a/paddle/fluid/operators/mean_op_npu.cc +++ b/paddle/fluid/operators/mean_op_npu.cc @@ -16,8 +16,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class MeanNPUKernel : public framework::OpKernel { public: @@ -51,31 +49,32 @@ class MeanGradNPUKernel : public framework::OpKernel { auto grad = context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(grad->numel(), - 1, - platform::errors::InvalidArgument( - "Mean Gradient Input Tensor len should be 1. But " - "received Out@Grad's elements num is %d.", - grad->numel())); + PADDLE_ENFORCE_EQ( + grad->numel(), + 1, + platform::errors::InvalidArgument( + "Mean Gradient Input phi::DenseTensor len should be 1. But " + "received Out@Grad's elements num is %d.", + grad->numel())); auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); // ones - Tensor ones(grad->dtype()); + phi::DenseTensor ones(grad->dtype()); ones.mutable_data(IG->dims(), context.GetPlace()); const auto& runner_ones = NpuOpRunner("OnesLike", {*IG}, {ones}, {}); runner_ones.Run(stream); // means - Tensor mean_tensor(grad->dtype()); + phi::DenseTensor mean_tensor(grad->dtype()); mean_tensor.Resize({1}); mean_tensor.mutable_data(context.GetPlace()); FillNpuTensorWithConstant( &mean_tensor, static_cast(1.0 / static_cast(IG->numel()))); // means mul ones - Tensor mean_ma(grad->dtype()); + phi::DenseTensor mean_ma(grad->dtype()); mean_ma.Resize(IG->dims()); mean_ma.mutable_data(context.GetPlace()); const auto& runner_mul_1 = diff --git a/paddle/fluid/operators/meshgrid_op_mlu.cc b/paddle/fluid/operators/meshgrid_op_mlu.cc index 76beb021bc654..f0103afbb0bc5 100644 --- a/paddle/fluid/operators/meshgrid_op_mlu.cc +++ b/paddle/fluid/operators/meshgrid_op_mlu.cc @@ -24,12 +24,12 @@ class MeshgridMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput("Out"); - PADDLE_ENFORCE_EQ( - (ins.size() > 1) && (ins.size() < 7), - true, - platform::errors::InvalidArgument( - "Excepted Tensor numbers between 2 and 6, but only received d% .", - ins.size())); + PADDLE_ENFORCE_EQ((ins.size() > 1) && (ins.size() < 7), + true, + platform::errors::InvalidArgument( + "Excepted phi::DenseTensor numbers between 2 and 6, " + "but only received d% .", + ins.size())); int64_t size = ins.size(); std::vector shape(size); diff --git a/paddle/fluid/operators/metrics/accuracy_op_mlu.cc b/paddle/fluid/operators/metrics/accuracy_op_mlu.cc index ec78fb09eab30..b66966ac64b90 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_mlu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_mlu.cc @@ -36,8 +36,8 @@ class AccuracyMLUKernel : public framework::OpKernel { } // cast `indices` or `label` if their type is not INT32 - Tensor indices_int32(framework::TransToPhiDataType(VT::INT32)); - Tensor label_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor label_int32(framework::TransToPhiDataType(VT::INT32)); auto indices_type = framework::TransToProtoVarType(indices->type()); if (indices_type != VT::INT32) { PADDLE_ENFORCE_EQ(MLUSupportsCast(indices_type, VT::INT32), @@ -89,7 +89,7 @@ class AccuracyMLUKernel : public framework::OpKernel { // equal MLUCnnlTensorDesc indices_int32_desc(indices_int32); MLUCnnlTensorDesc label_int32_desc(label_int32); - Tensor equal_tensor(framework::TransToPhiDataType(VT::BOOL)); + phi::DenseTensor equal_tensor(framework::TransToPhiDataType(VT::BOOL)); equal_tensor.Resize(indices->dims()); equal_tensor.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc equal_tensor_desc(equal_tensor); @@ -103,7 +103,7 @@ class AccuracyMLUKernel : public framework::OpKernel { GetBasePtr(&equal_tensor)); // cast equal - Tensor equal_fp32(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor equal_fp32(framework::TransToPhiDataType(VT::FP32)); equal_fp32.Resize(indices->dims()); equal_fp32.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc equal_fp32_desc(equal_fp32); @@ -117,7 +117,7 @@ class AccuracyMLUKernel : public framework::OpKernel { // [correct] // reduce_max - Tensor correct_max(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor correct_max(framework::TransToPhiDataType(VT::FP32)); correct_max.Resize(phi::make_ddim({num_samples})); correct_max.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc correct_max_desc(correct_max); @@ -140,7 +140,7 @@ class AccuracyMLUKernel : public framework::OpKernel { GetBasePtr(&correct_max)); // reduce_sum - Tensor correct_sum(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor correct_sum(framework::TransToPhiDataType(VT::FP32)); correct_sum.Resize(correct->dims()); correct_sum.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc correct_sum_desc(correct_sum); @@ -183,7 +183,7 @@ class AccuracyMLUKernel : public framework::OpKernel { GetBasePtr(total)); // use `total` of type `float32` for calculating accuracy - Tensor total_fp32(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor total_fp32(framework::TransToPhiDataType(VT::FP32)); total_fp32.Resize(total->dims()); total_fp32.mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc total_fp32_desc(total_fp32); diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index 4c83071264a42..737228902b6e7 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class AccuracyXPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h index 55be510dcd237..bec8bba09ad1a 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ b/paddle/fluid/operators/metrics/precision_recall_op.h @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index d27234344ff27..146ee52fc62ff 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -26,7 +26,6 @@ namespace operators { using dnnl::memory; using dnnl::primitive; using dnnl::reorder; -using Tensor = phi::DenseTensor; using dnnl::stream; template diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index be965c4abb895..c2556b6bfc41d 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -20,16 +20,15 @@ limitations under the License. */ namespace { using dnnl::memory; using paddle::framework::ExecutionContext; +using paddle::framework::GradVarName; using paddle::platform::MatMulV2MKLDNNHandler; using phi::OneDNNContext; using phi::vectorize; using phi::funcs::OneDNNGetDataType; -using Tensor = phi::DenseTensor; -using paddle::framework::GradVarName; // Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Identity op if the tensor is not of rank 3. -static Tensor FoldOuterDims(const Tensor &input) { +static phi::DenseTensor FoldOuterDims(const phi::DenseTensor &input) { auto output = input; auto in_dims = input.dims(); if (in_dims.size() == 3) { @@ -42,14 +41,14 @@ static Tensor FoldOuterDims(const Tensor &input) { // (Warning: This requires transposing data and writes into new memory.) // Identity op if the tensor is not of rank 3. template -static Tensor FoldFirstAndLastDims(const OneDNNContext &dev_ctx, - const Tensor *input) { +static phi::DenseTensor FoldFirstAndLastDims(const OneDNNContext &dev_ctx, + const phi::DenseTensor *input) { auto input_dims = vectorize(input->dims()); if (input_dims.size() != 3) { return *input; } - Tensor output; + phi::DenseTensor output; output.Resize({input_dims[1], input_dims[0], input_dims[2]}); auto output_dims = vectorize(output.dims()); @@ -89,11 +88,11 @@ class MatMulMKLDNNHandler public: MatMulMKLDNNHandler(const dnnl::engine engine, paddle::platform::Place cpu_place, - Tensor *x, + phi::DenseTensor *x, bool trans_x, - Tensor *y, + phi::DenseTensor *y, bool trans_y, - Tensor *out, + phi::DenseTensor *out, float scale) : phi::funcs::OneDNNHandlerNoCachingT(engine, cpu_place) { @@ -129,7 +128,7 @@ class MatMulMKLDNNHandler this->AcquireForwardPrimitiveDescriptor(attrs, x_md, y_md, out_md); } - std::shared_ptr AcquireWeightsMemory(const Tensor *input) { + std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor *input) { const YT *input_data = input->data(); return this->AcquireMemoryFromPrimitive( this->fwd_pd_->weights_desc(), @@ -176,11 +175,10 @@ class MatMulMKLDNNHandler // We cannot use base AcquireDstMemory as it makes an allocation request // base on DST memory primitive size. This is fine in general, but in MatMul // we have primitive that covers only one batch of Data and then shift - // pointer for every new batch. Hence Tensor size is bigger that dst memory - // primitive size. So would we request less memory that is there and it - // triggers an - // assertion. So as there is no 'any' format here we can leave default size - // of Tensor as computed in ComputeInferShape + // pointer for every new batch. Hence phi::DenseTensor size is bigger that + // dst memory primitive size. So would we request less memory that is there + // and it triggers an assertion. So as there is no 'any' format here we can + // leave default size of phi::DenseTensor as computed in ComputeInferShape OT *ptr = output->mutable_data(this->place_); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); } @@ -199,7 +197,7 @@ class MatMulMKLDNNHandler * If transposed, `H,W` will be swapped. */ static void ReshapeTensorToMatrixSequence( - Tensor *x, const phi::funcs::MatDescriptor &descriptor) { + phi::DenseTensor *x, const phi::funcs::MatDescriptor &descriptor) { int64_t h, w; h = descriptor.height_; w = descriptor.width_; @@ -227,8 +225,11 @@ static void ReshapeTensorToMatrixSequence( * If any of `X` and `Y` has batch size BatchSize, the out will have the * BatchSize. */ -static void ReshapeXYOutToMatrixSequence( - Tensor *x, Tensor *y, Tensor *out, bool trans_x, bool trans_y) { +static void ReshapeXYOutToMatrixSequence(phi::DenseTensor *x, + phi::DenseTensor *y, + phi::DenseTensor *out, + bool trans_x, + bool trans_y) { auto x_dim = phi::funcs::RowMatrixDimsFromVector(x->dims()); auto y_dim = phi::funcs::ColumnMatrixDimsFromVector(y->dims()); auto mat_dim_x = phi::funcs::CreateMatrixDescriptor(x_dim, 0, trans_x); @@ -326,13 +327,13 @@ bool IsOutputFused(const ExecutionContext &ctx) { template void ExecuteMatMulV2(const ExecutionContext &ctx, const dnnl::engine onednn_engine, - const Tensor *x, + const phi::DenseTensor *x, const std::vector &x_dims, bool trans_x, - const Tensor *y, + const phi::DenseTensor *y, const std::vector &y_dims, bool trans_y, - Tensor *out) { + phi::DenseTensor *out) { std::vector x_strides_override = GetInputStrides(ctx, "X"); std::vector y_strides_override = GetInputStrides(ctx, "Y"); MatMulV2MKLDNNHandler handler(ctx, @@ -471,7 +472,7 @@ class MatMulMKLDNNKernel : public paddle::framework::OpKernel { const std::vector &y_dims, std::vector *x_bd_dims, std::vector *y_bd_dims, - Tensor *out) const { + phi::DenseTensor *out) const { if (x_dims.size() == 1) { (*x_bd_dims)[(*x_bd_dims).size() - 1] = x_dims[0]; } else if (x_dims.size() == 2) { @@ -501,7 +502,7 @@ class MatMulMKLDNNKernel : public paddle::framework::OpKernel { (*y_bd_dims)[i] == 1, true, paddle::platform::errors::InvalidArgument( - "Tensor dimensions are incorrect for broadcasting." + "phi::DenseTensor dimensions are incorrect for broadcasting." "Dimensions in X and Y must be same or equal to 1, but " "received x_dim[%d]=%d and y_dims[%d]= %d", i, @@ -649,7 +650,7 @@ class MatMulGradMKLDNNKernel : public paddle::framework::OpKernel { bool need_combine = (x->dims().size() == 3 || y->dims().size() == 3) && out->dims().size() == 2; - Tensor x_combined, y_combined; + phi::DenseTensor x_combined, y_combined; if (!need_combine) { x_combined = *x; y_combined = *y; diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 098623ea52466..c23f247c9d212 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -25,7 +25,6 @@ namespace operators { using dnnl::memory; using dnnl::primitive; using dnnl::reorder; -using Tensor = phi::DenseTensor; using dnnl::stream; using phi::DataLayout; diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 36498e60f4e54..a9408ad38e3a1 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -25,7 +25,6 @@ namespace operators { using dnnl::memory; using dnnl::reorder; -using Tensor = phi::DenseTensor; namespace { diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index ff2484c7ced38..0c2b439b3e510 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -40,7 +40,7 @@ static std::vector extract_shape( tensor->dims(), phi::make_ddim({1}), platform::errors::InvalidArgument( - "If the element type of 'shape' in ReshapeOp is Tensor, " + "If the element type of 'shape' in ReshapeOp is phi::DenseTensor, " "the element's shape must be [1]. But received the element's shape " "is [%s]", tensor->dims())); diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index 2c5b269c3923b..077107dca68f3 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -21,7 +21,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using phi::DataLayout; using phi::OneDNNContext; @@ -37,8 +36,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { const auto& dnnl_engine = dev_ctx.GetEngine(); std::vector transpose_axis = ctx.Attr>("axis"); int ndims = transpose_axis.size(); - const phi::DenseTensor* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const phi::DenseTensor* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto& astream = OneDNNContext::tls().get_stream(); @@ -122,8 +121,9 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { paddle::platform::errors::PreconditionNotMet( "Operator DNNL TransposeGrad must use CPUPlace")); - const auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + const auto* dout = + ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); if (!dx) return; auto& dev_ctx = ctx.template device_context(); const auto& dnnl_engine = dev_ctx.GetEngine(); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index d205bc2b2554d..09b1551086fab 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -386,7 +386,7 @@ MLUOpTensorDesc::MLUOpTensorDesc(const int tensor_dim, mluOpSetTensorDescriptorPosition(raw_tensor_desc, position)); } -MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, +MLUOpTensorDesc::MLUOpTensorDesc(const phi::DenseTensor& tensor, const mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype) { auto dims = phi::vectorize(tensor.dims()); @@ -407,11 +407,11 @@ MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, } } -MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor) +MLUOpTensorDesc::MLUOpTensorDesc(const phi::DenseTensor& tensor) : MLUOpTensorDesc( tensor, MLUOP_LAYOUT_ARRAY, ToMluOpDataType(tensor.dtype())) {} -MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, +MLUOpTensorDesc::MLUOpTensorDesc(const phi::DenseTensor& tensor, mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype, int position) @@ -420,7 +420,7 @@ MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, mluOpSetTensorDescriptorPosition(raw_tensor_desc, position)); } -MLUOpTensorDesc::MLUOpTensorDesc(const Tensor& tensor, +MLUOpTensorDesc::MLUOpTensorDesc(const phi::DenseTensor& tensor, mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype, int position, @@ -562,7 +562,7 @@ const cnnlRandGenerator_t MLUCnnlRandomGeneratorDesc::get() const { return mlu_generator; } -Tensor& MLUCnnlRandomGeneratorDesc::get_state() { return mlu_state; } +phi::DenseTensor& MLUCnnlRandomGeneratorDesc::get_state() { return mlu_state; } MLUCnnlRandomGeneratorDesc::~MLUCnnlRandomGeneratorDesc() { if (mlu_generator) { @@ -953,7 +953,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetConcatWorkspaceSize(handle, pack_num, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -981,7 +981,7 @@ MLURNNDesc::~MLURNNDesc() { PADDLE_ENFORCE_MLU_SUCCESS( cnnlGetConcatWorkspaceSize(handle, pack_num, &workspace_size)); - Tensor workspace(paddle::experimental::DataType::INT8); + phi::DenseTensor workspace(paddle::experimental::DataType::INT8); workspace.Resize(framework::DDim({static_cast(workspace_size)})); void* workspace_ptr = workspace.mutable_data(dev_ctx.GetPlace()); @@ -1011,7 +1011,7 @@ MLURNNDesc::~MLURNNDesc() { handle, in0_desc, in1_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1067,7 +1067,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_quant_desc, output_desc, local_size, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1104,7 +1104,7 @@ MLURNNDesc::~MLURNNDesc() { // use ctx allocate interface for profiling purpose auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1580,7 +1580,7 @@ MLURNNDesc::~MLURNNDesc() { handle, in0_desc, in1_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1634,7 +1634,7 @@ MLURNNDesc::~MLURNNDesc() { handle, a_desc, b_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1665,7 +1665,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetAxWorkspaceSize(handle, alpha_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1754,7 +1754,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1803,7 +1803,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetSplitWorkspaceSize(handle, split_num, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1831,7 +1831,7 @@ MLURNNDesc::~MLURNNDesc() { PADDLE_ENFORCE_MLU_SUCCESS( cnnlGetSplitWorkspaceSize(handle, split_num, &workspace_size)); - Tensor workspace(paddle::experimental::DataType::INT8); + phi::DenseTensor workspace(paddle::experimental::DataType::INT8); workspace.Resize(framework::DDim({static_cast(workspace_size)})); void* workspace_ptr = workspace.mutable_data(dev_ctx.GetPlace()); @@ -1947,7 +1947,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -1979,7 +1979,7 @@ MLURNNDesc::~MLURNNDesc() { handle, condition_desc, then_desc, else_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2023,7 +2023,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2162,7 +2162,7 @@ MLURNNDesc::~MLURNNDesc() { handle, pool_mode, output_w, output_h, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2217,7 +2217,7 @@ MLURNNDesc::~MLURNNDesc() { handle, pool_mode, output_shape[2], output_shape[1], &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2270,7 +2270,7 @@ MLURNNDesc::~MLURNNDesc() { handle, data_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2359,7 +2359,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2433,7 +2433,7 @@ MLURNNDesc::~MLURNNDesc() { size_t workspace_size = 0; void* workspace_ptr = nullptr; - Tensor workspace; + phi::DenseTensor workspace; if (need_workspace) { PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetReduceOpWorkspaceSize( handle, input_desc, output_desc, reduction_desc, &workspace_size)); @@ -2473,7 +2473,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2502,7 +2502,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2530,7 +2530,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetMaximumWorkspaceSize(handle, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2558,7 +2558,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetMinimumWorkspaceSize(handle, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2587,7 +2587,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2617,7 +2617,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2647,7 +2647,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2677,7 +2677,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -2944,7 +2944,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetDynamicStitchWorkspaceSize(handle, size, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3203,7 +3203,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetNmsWorkspaceSize_v2(handle, confidence_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3252,7 +3252,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3427,7 +3427,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetLayerNormOpWorkspaceSize(handle, axis, x_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3499,7 +3499,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetQuantizeParamWorkspaceSize(handle, input_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3565,7 +3565,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3708,7 +3708,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlMakeFusedOpsPlan(handle, fusion_plan, cparam_pack, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3773,7 +3773,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3839,7 +3839,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3899,7 +3899,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -3967,7 +3967,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4021,7 +4021,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4079,7 +4079,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4135,7 +4135,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4207,7 +4207,7 @@ MLURNNDesc::~MLURNNDesc() { handle, matmul_desc, a_desc, b_desc, output_desc, algo, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4295,7 +4295,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4399,7 +4399,7 @@ MLURNNDesc::~MLURNNDesc() { &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4450,7 +4450,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, perm_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4517,7 +4517,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetWhereWorkspaceSize(handle, num_true_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4593,7 +4593,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input1_desc, input2_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4623,7 +4623,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlGetQRWorkspaceSize(handle, a_desc, some, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4667,7 +4667,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, weight_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4705,7 +4705,7 @@ MLURNNDesc::~MLURNNDesc() { handle, target_desc, weight_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4742,7 +4742,7 @@ MLURNNDesc::~MLURNNDesc() { handle, x_desc, algorithm, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4778,7 +4778,7 @@ MLURNNDesc::~MLURNNDesc() { handle, x_desc, algorithm, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4857,7 +4857,7 @@ MLURNNDesc::~MLURNNDesc() { handle, diff_desc, output_desc, scale_grad_by_freq, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -4903,7 +4903,7 @@ MLURNNDesc::~MLURNNDesc() { "MLU RNNForward failed. x_desc initializing failed.")); auto& dev_ctx = GetDevCtxFromCTX(ctx); size_t workspace_size, reservespace_size; - Tensor workspace; + phi::DenseTensor workspace; PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNTempSizes( handle, rnn_desc, x_desc, &workspace_size, &reservespace_size)); workspace = ctx.AllocateTmpTensor( @@ -4967,7 +4967,7 @@ MLURNNDesc::~MLURNNDesc() { "MLU RNNForward failed. x_desc initializing failed.")); auto& dev_ctx = GetDevCtxFromCTX(ctx); size_t workspace_size; - Tensor workspace; + phi::DenseTensor workspace; PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNTempSizes( handle, rnn_desc, x_desc, &workspace_size, &reservespace_size)); workspace = ctx.AllocateTmpTensor( @@ -5028,7 +5028,7 @@ MLURNNDesc::~MLURNNDesc() { cnnlHandle_t handle = GetHandleFromCTX(ctx); auto& dev_ctx = GetDevCtxFromCTX(ctx); size_t workspace_size; - Tensor workspace; + phi::DenseTensor workspace; PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetMaskedWorkspaceSize(handle, masked_mode, input_desc, @@ -5075,7 +5075,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, weight_desc, pos_weight_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -5119,7 +5119,7 @@ MLURNNDesc::~MLURNNDesc() { handle, target_desc, weight_desc, pos_weight_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); @@ -5227,7 +5227,7 @@ MLURNNDesc::~MLURNNDesc() { handle, input_desc, grid_desc, output_desc, &workspace_size)); auto& dev_ctx = GetDevCtxFromCTX(ctx); - Tensor workspace = ctx.AllocateTmpTensor( + phi::DenseTensor workspace = ctx.AllocateTmpTensor( {static_cast(workspace_size)}, dev_ctx); void* workspace_ptr = workspace.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h index 8fbaade9dc01b..413158f441a7b 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.h +++ b/paddle/fluid/operators/mlu/mlu_baseop.h @@ -29,7 +29,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; using ExecutionContext = framework::ExecutionContext; using DeviceContextPool = platform::DeviceContextPool; @@ -377,18 +376,18 @@ class MLUOpTensorDesc { const mluOpDataType_t tensor_dtype, int position); - MLUOpTensorDesc(const Tensor& tensor, + MLUOpTensorDesc(const phi::DenseTensor& tensor, const mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype); - explicit MLUOpTensorDesc(const Tensor& tensor); + explicit MLUOpTensorDesc(const phi::DenseTensor& tensor); - MLUOpTensorDesc(const Tensor& tensor, + MLUOpTensorDesc(const phi::DenseTensor& tensor, mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype, int position); - MLUOpTensorDesc(const Tensor& tensor, + MLUOpTensorDesc(const phi::DenseTensor& tensor, mluOpTensorLayout_t layout, const mluOpDataType_t tensor_dtype, int position, @@ -458,11 +457,11 @@ class MLUCnnlRandomGeneratorDesc { public: MLUCnnlRandomGeneratorDesc(const ExecutionContext& ctx, const int seed); const cnnlRandGenerator_t get() const; - Tensor& get_state(); + phi::DenseTensor& get_state(); ~MLUCnnlRandomGeneratorDesc(); private: - Tensor mlu_state; + phi::DenseTensor mlu_state; cnnlRandGenerator_t mlu_generator = nullptr; }; diff --git a/paddle/fluid/operators/modified_huber_loss_op.cu b/paddle/fluid/operators/modified_huber_loss_op.cu index 330f4ca3596bd..bd4451ebda46d 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cu +++ b/paddle/fluid/operators/modified_huber_loss_op.cu @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - struct ModifiedHuberLossBackward { template HOSTDEVICE void operator()(Tuple t) const { diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h index 50d5a14548e35..62600ed7c6970 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.h +++ b/paddle/fluid/operators/modified_huber_loss_op.h @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/multi_dot_op.cc b/paddle/fluid/operators/multi_dot_op.cc index b83bc8ea6541b..483c2bda72efa 100644 --- a/paddle/fluid/operators/multi_dot_op.cc +++ b/paddle/fluid/operators/multi_dot_op.cc @@ -27,7 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class MultiDotOpMaker : public framework::OpProtoAndCheckerMaker { public: diff --git a/paddle/fluid/operators/multinomial_op_npu.cc b/paddle/fluid/operators/multinomial_op_npu.cc index 206c7b041a9b3..74f3578c6e8d4 100644 --- a/paddle/fluid/operators/multinomial_op_npu.cc +++ b/paddle/fluid/operators/multinomial_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class NPUMultinomialKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/multiplex_op.cc b/paddle/fluid/operators/multiplex_op.cc index 749849a333f3d..ba263427caa87 100644 --- a/paddle/fluid/operators/multiplex_op.cc +++ b/paddle/fluid/operators/multiplex_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class MultiplexOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index a4b418b14cc84..4b9fe86b22565 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -31,7 +31,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using Sampler = math::Sampler; using DDim = framework::DDim; @@ -44,7 +43,7 @@ using EigenMatrix = framework::EigenMatrix; template void PrepareSamples(const framework::ExecutionContext &context, Sampler *sampler, - Tensor *sample_labels) { + phi::DenseTensor *sample_labels) { auto label = context.Input("Label"); const int64_t *label_data = label->data(); auto label_dims = label->dims(); @@ -154,9 +153,9 @@ class NCEKernel : public framework::OpKernel { std::vector sample_out_dims; auto label = context.Input("Label"); - Tensor *sample_labels; - Tensor *sample_out; - Tensor sample_labels_tmp, sample_out_tmp; + phi::DenseTensor *sample_labels; + phi::DenseTensor *sample_out; + phi::DenseTensor sample_labels_tmp, sample_out_tmp; if (is_test) { // set dims of output(SampleOut) int num_true_classes = label->dims().size() == 2 ? label->dims()[1] : 1; @@ -339,7 +338,7 @@ class NCEGradKernel : public framework::OpKernel { } // T b = 1. / num_total_classes * num_neg_samples; - Tensor sample_grad; // tmp tensor + phi::DenseTensor sample_grad; // tmp tensor T *sample_grad_data = sample_grad.mutable_data(sample_labels->dims(), context.GetPlace()); // backward cost diff --git a/paddle/fluid/operators/norm_op_npu.cc b/paddle/fluid/operators/norm_op_npu.cc index c5f0749227e23..619f902513459 100644 --- a/paddle/fluid/operators/norm_op_npu.cc +++ b/paddle/fluid/operators/norm_op_npu.cc @@ -16,7 +16,6 @@ namespace paddle { namespace operators { using DDim = framework::DDim; -using Tensor = phi::DenseTensor; void CheckAxis(int axis, int rank) { // check the axis is in [-rank, rank-1] diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index b6e27e6b54151..2412913995b95 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -37,7 +37,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, @@ -433,21 +432,21 @@ __global__ void DoubleGradComputeDDYWithGlobal(const T *ddx, template void NormDoubleGradFunctor(const DeviceContext &ctx, const DataLayout data_layout, - const Tensor *X, - const Tensor *Scale, - const Tensor *dY, - const Tensor *Saved_mean, - const Tensor *Saved_variance, - const Tensor *Mean, - const Tensor *Variance, + const phi::DenseTensor *X, + const phi::DenseTensor *Scale, + const phi::DenseTensor *dY, + const phi::DenseTensor *Saved_mean, + const phi::DenseTensor *Saved_variance, + const phi::DenseTensor *Mean, + const phi::DenseTensor *Variance, const double epsilon, const bool use_global_stats, - const Tensor *ddX, - const Tensor *ddScale, - const Tensor *ddBias, - Tensor *dX, - Tensor *dScale, - Tensor *ddY) { + const phi::DenseTensor *ddX, + const phi::DenseTensor *ddScale, + const phi::DenseTensor *ddBias, + phi::DenseTensor *dX, + phi::DenseTensor *dScale, + phi::DenseTensor *ddY) { const T *x_data = X->data(); const T *dy_data = dY->data(); const T *ddx_data = (ddX == nullptr ? nullptr : ddX->data()); @@ -463,7 +462,7 @@ void NormDoubleGradFunctor(const DeviceContext &ctx, const int N = x_dims[0]; const int num = X->numel(); const int sample_size = num / N / C; - Tensor scale_tmp; + phi::DenseTensor scale_tmp; if (!Scale) { scale_tmp.mutable_data({C}, ctx.GetPlace()); set_constant(ctx, &scale_tmp, static_cast(1)); diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index 99623917d59ee..fdab03698711c 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -37,8 +37,6 @@ static inline int GET_BLOCKS(const int N) { return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; } -using Tensor = phi::DenseTensor; - template __global__ void initialize_zero_kernel(T* data, const int length) { CUDA_KERNEL_LOOP(idx, length) { data[idx] = static_cast(0); } diff --git a/paddle/fluid/operators/one_hot_op.h b/paddle/fluid/operators/one_hot_op.h index d878fd5a6d44b..41ec3eb9a135f 100644 --- a/paddle/fluid/operators/one_hot_op.h +++ b/paddle/fluid/operators/one_hot_op.h @@ -76,7 +76,6 @@ struct OneHotOpFunctor { } }; -using Tensor = phi::DenseTensor; template class OneHotKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc index e2997dc079c61..35e8bcde9daad 100644 --- a/paddle/fluid/operators/one_hot_op_npu.cc +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class OneHotNPUKernel : public framework::OpKernel { @@ -54,7 +53,7 @@ class OneHotNPUKernel : public framework::OpKernel { .AddOutput(*out); runner.Run(dev_ctx.stream()); } else { - Tensor transformed_in; + phi::DenseTensor transformed_in; transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); const auto& cast_runner = NpuOpRunner( "Cast", {*in}, {transformed_in}, {{"dst_type", ACL_INT32}}); diff --git a/paddle/fluid/operators/one_hot_op_xpu.cc b/paddle/fluid/operators/one_hot_op_xpu.cc index 66826cd4ff33a..e4f8555fceae2 100644 --- a/paddle/fluid/operators/one_hot_op_xpu.cc +++ b/paddle/fluid/operators/one_hot_op_xpu.cc @@ -22,8 +22,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class OneHotXPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/one_hot_v2_op_mlu.cc b/paddle/fluid/operators/one_hot_v2_op_mlu.cc index f98cbabf58a87..0b2fbfe85d403 100644 --- a/paddle/fluid/operators/one_hot_v2_op_mlu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_mlu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class OneHotV2MLUKernel : public framework::OpKernel { @@ -44,10 +43,12 @@ class OneHotV2MLUKernel : public framework::OpKernel { float on_value = 1.0f, off_value = 0.0f; const int in_off_dim[1] = {1}; - Tensor on_value_tensor = ctx.AllocateTmpTensor( - framework::DDim(in_off_dim, 1), dev_ctx); - Tensor off_value_tensor = ctx.AllocateTmpTensor( - framework::DDim(in_off_dim, 1), dev_ctx); + phi::DenseTensor on_value_tensor = + ctx.AllocateTmpTensor( + framework::DDim(in_off_dim, 1), dev_ctx); + phi::DenseTensor off_value_tensor = + ctx.AllocateTmpTensor( + framework::DDim(in_off_dim, 1), dev_ctx); FillMLUTensorWithHostValue(ctx, on_value, &on_value_tensor); FillMLUTensorWithHostValue(ctx, off_value, &off_value_tensor); @@ -64,7 +65,7 @@ class OneHotV2MLUKernel : public framework::OpKernel { ToCnnlDataType(out->dtype()), GetBasePtr(out)); } else { - Tensor transformed_in; + phi::DenseTensor transformed_in; transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); // use cnnlCast to cast int64_t to int32_t then do one_hot MLUCnnlTensorDesc in_desc(*in); diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc index 8cc97b417ca78..d305a04ea0782 100644 --- a/paddle/fluid/operators/one_hot_v2_op_npu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class OneHotV2NPUKernel : public framework::OpKernel { @@ -53,7 +52,7 @@ class OneHotV2NPUKernel : public framework::OpKernel { .AddOutput(*out); runner.Run(dev_ctx.stream()); } else { - Tensor transformed_in; + phi::DenseTensor transformed_in; transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); const auto& cast_runner = NpuOpRunner( "Cast", {*in}, {transformed_in}, {{"dst_type", ACL_INT32}}); diff --git a/paddle/fluid/operators/optimizers/adadelta_op.cc b/paddle/fluid/operators/optimizers/adadelta_op.cc index 4390da3c4e479..262aa0fc350e2 100644 --- a/paddle/fluid/operators/optimizers/adadelta_op.cc +++ b/paddle/fluid/operators/optimizers/adadelta_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class AdadeltaOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc index 4f800233c24fe..54643a39bcd4c 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/adagrad_op.cc @@ -25,7 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class AdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index aa331df4cbd0c..cf447bc593103 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -19,8 +19,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class AdamOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/adam_op_mlu.cc b/paddle/fluid/operators/optimizers/adam_op_mlu.cc index c9c33643d1ee5..d998cff14126c 100644 --- a/paddle/fluid/operators/optimizers/adam_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class AdamMLUKernel : public framework::OpKernel { public: @@ -156,9 +154,9 @@ class AdamMLUKernel : public framework::OpKernel { const phi::DenseTensor* beta2_tensor = nullptr; const phi::DenseTensor* epsilon_tensor = nullptr; - Tensor beta1_tmp(experimental::DataType::FLOAT32); - Tensor beta2_tmp(experimental::DataType::FLOAT32); - Tensor epsilon_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta1_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta2_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor epsilon_tmp(experimental::DataType::FLOAT32); if (ctx.HasInput("Beta1Tensor")) { beta1_tensor = ctx.Input("Beta1Tensor"); @@ -462,9 +460,9 @@ class MergedAdamMLUKernel : public framework::OpKernel { const phi::DenseTensor* beta2_tensor = nullptr; const phi::DenseTensor* epsilon_tensor = nullptr; - Tensor beta1_tmp(experimental::DataType::FLOAT32); - Tensor beta2_tmp(experimental::DataType::FLOAT32); - Tensor epsilon_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta1_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta2_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor epsilon_tmp(experimental::DataType::FLOAT32); T beta1 = static_cast(ctx.Attr("beta1")); T beta2 = static_cast(ctx.Attr("beta2")); diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index f94b32413a04a..356bef435e45c 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class AdamNPUKernel : public framework::OpKernel { public: @@ -132,9 +130,9 @@ class AdamNPUKernel : public framework::OpKernel { const phi::DenseTensor* beta2_tensor = nullptr; const phi::DenseTensor* epsilon_tensor = nullptr; - Tensor beta1_tmp(experimental::DataType::FLOAT32); - Tensor beta2_tmp(experimental::DataType::FLOAT32); - Tensor epsilon_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta1_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor beta2_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor epsilon_tmp(experimental::DataType::FLOAT32); if (ctx.HasInput("Beta1Tensor")) { beta1_tensor = ctx.Input("Beta1Tensor"); @@ -286,9 +284,9 @@ class AdamWNPUKernel : public AdamNPUKernel { ctx.template device_context() .stream(); - Tensor one(experimental::DataType::FLOAT32); - Tensor decay(experimental::DataType::FLOAT32); - Tensor tmp(experimental::DataType::FLOAT32); + phi::DenseTensor one(experimental::DataType::FLOAT32); + phi::DenseTensor decay(experimental::DataType::FLOAT32); + phi::DenseTensor tmp(experimental::DataType::FLOAT32); tmp.mutable_data({1}, place); one.mutable_data({1}, place); diff --git a/paddle/fluid/operators/optimizers/adamax_op.cc b/paddle/fluid/operators/optimizers/adamax_op.cc index 5298030f17a04..12429933e03d3 100644 --- a/paddle/fluid/operators/optimizers/adamax_op.cc +++ b/paddle/fluid/operators/optimizers/adamax_op.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class AdamaxOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 5ab3ef3b2e61c..6c73439c62551 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class DecayedAdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index e866a97f1ddcc..f5710f2e7d8eb 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class DpsgdOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cc b/paddle/fluid/operators/optimizers/ftrl_op.cc index b81a6c5ab6bb7..22be1f5ac685a 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cc +++ b/paddle/fluid/operators/optimizers/ftrl_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class FTRLOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index 97b1a09766b68..99e210ce51e96 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/optimizers/merged_adam_op.cc b/paddle/fluid/operators/optimizers/merged_adam_op.cc index 8e4ff40372a12..867cfe0268c51 100644 --- a/paddle/fluid/operators/optimizers/merged_adam_op.cc +++ b/paddle/fluid/operators/optimizers/merged_adam_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class MergedAdamOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc index c390a12863bc4..ea74dba1c54d1 100644 --- a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc @@ -134,7 +134,8 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - Tensor mu_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); + phi::DenseTensor mu_tensor = + ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc mu_tensor_desc(mu_tensor); MLUCnnl::Fill(ctx, CNNL_POINTER_MODE_HOST, @@ -158,7 +159,7 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { auto velocity_out = velocitys_out[idx]; auto grad = grads[idx]; - Tensor regularized_grad; + phi::DenseTensor regularized_grad; MLUCnnlTensorDesc param_desc(*param_out); if (regularization_flag == phi::RegularizationType::kL2DECAY) { regularized_grad = ctx.AllocateTmpTensor( diff --git a/paddle/fluid/operators/optimizers/momentum_op.cc b/paddle/fluid/operators/optimizers/momentum_op.cc index 4171f0c11955a..538028139b8c4 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.cc +++ b/paddle/fluid/operators/optimizers/momentum_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class MomentumOpInferVarType : public framework::VarTypeInference { public: void operator()(framework::InferVarTypeContext* ctx) const override { @@ -38,24 +36,24 @@ class MomentumOpInferVarType : public framework::VarTypeInference { void MomentumOpMaker::Make() { AddInput("Param", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input parameter that has to be updated"); AddInput("Grad", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input gradient of the parameter"); AddInput("Velocity", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input velocity (corresponding to the parameter) " "that has to be updated"); AddInput("LearningRate", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input learning rate"); AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); AddOutput("ParamOut", - "(Tensor) This output is updated parameter. " + "(phi::DenseTensor) This output is updated parameter. " "It shared memory with Input(Param)."); AddOutput("VelocityOut", - "(Tensor) This output is updated velocity. " + "(phi::DenseTensor) This output is updated velocity. " "It shared memory with Input(Velocity)."); AddOutput("MasterParamOut", "The updated FP32 master weight for AMP. " diff --git a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc index 4bebb4264cc29..b37e7aa99f793 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc @@ -50,7 +50,7 @@ class MLUMomentumOpKernel : public framework::OpKernel { auto* grad_var = ctx.InputVar("Grad"); if (grad_var->IsType()) { auto grad = ctx.Input("Grad"); - Tensor mu_tensor = + phi::DenseTensor mu_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc mu_tensor_desc(mu_tensor); MLUCnnl::Fill(ctx, @@ -59,7 +59,7 @@ class MLUMomentumOpKernel : public framework::OpKernel { mu_tensor_desc.get(), GetBasePtr(&mu_tensor)); - Tensor regularized_grad; + phi::DenseTensor regularized_grad; MLUCnnlTensorDesc param_desc(*param); if (regularization_flag == phi::RegularizationType::kL2DECAY) { regularized_grad = diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc index 2da5bed7642c1..598b84415f9ec 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class ProximalAdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h index 136e416307ab0..72eccd17e4489 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ProximalAdagradOpKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index 061e495c4bacd..21b145ee49d7c 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class ProximalGDOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.h b/paddle/fluid/operators/optimizers/proximal_gd_op.h index 024062045ae43..49cf7b68bd32a 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.h +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ProximalGDOpKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc index 579bc76be5f47..abbe7ddcc5b61 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc @@ -15,8 +15,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class RMSPROPNPUKernel : public framework::OpKernel { public: @@ -46,18 +44,18 @@ class RMSPROPNPUKernel : public framework::OpKernel { auto *grad_tensor = ctx.Input("Grad"); if (centered) { framework::NPUAttributeMap attr_input = {{"use_locking", false}}; - const Tensor *rho_tensor = nullptr; - const Tensor *momentum_tensor = nullptr; - const Tensor *epsilon_tensor = nullptr; - Tensor rho_tmp(experimental::DataType::FLOAT32); + const phi::DenseTensor *rho_tensor = nullptr; + const phi::DenseTensor *momentum_tensor = nullptr; + const phi::DenseTensor *epsilon_tensor = nullptr; + phi::DenseTensor rho_tmp(experimental::DataType::FLOAT32); rho_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&rho_tmp, rho); rho_tensor = &rho_tmp; - Tensor momentum_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor momentum_tmp(experimental::DataType::FLOAT32); momentum_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&momentum_tmp, momentum); momentum_tensor = &momentum_tmp; - Tensor epsilon_tmp(experimental::DataType::FLOAT32); + phi::DenseTensor epsilon_tmp(experimental::DataType::FLOAT32); epsilon_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&epsilon_tmp, epsilon); epsilon_tensor = &epsilon_tmp; diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index 3e072a5e17a64..f59171e3ae7c4 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -19,8 +19,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SparseMomentumOpInferVarType : public framework::VarTypeInference { public: void operator()(framework::InferVarTypeContext* ctx) const override { @@ -36,30 +34,31 @@ class SparseMomentumOpInferVarType : public framework::VarTypeInference { void SparseMomentumOpMaker::Make() { AddInput("Param", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input parameter that has to be updated"); AddInput("Grad", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input gradient of the parameter"); AddInput("Velocity", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input velocity (corresponding to the parameter) " "that has to be updated"); AddInput("Index", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input index of Param to do update operation"); AddInput("Axis", - "The Tensor which contains the axis that we do update operation.") + "The phi::DenseTensor which contains the axis that we do update " + "operation.") .AsDispensable(); AddInput("LearningRate", - "(Tensor, default Tensor) " + "(phi::DenseTensor, default phi::DenseTensor) " "Input learning rate"); AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); AddOutput("ParamOut", - "(Tensor) This output is updated parameter. " + "(phi::DenseTensor) This output is updated parameter. " "It shared memory with Input(Param)."); AddOutput("VelocityOut", - "(Tensor) This output is updated velocity. " + "(phi::DenseTensor) This output is updated velocity. " "It shared memory with Input(Velocity)."); AddOutput("MasterParamOut", "The updated FP32 master weight for AMP. " diff --git a/paddle/fluid/operators/p_norm_op_npu.cc b/paddle/fluid/operators/p_norm_op_npu.cc index 9d312dd572a45..5c86cf188c021 100644 --- a/paddle/fluid/operators/p_norm_op_npu.cc +++ b/paddle/fluid/operators/p_norm_op_npu.cc @@ -62,7 +62,7 @@ class PnormNPUKernel : public framework::OpKernel { {"keep_dims", keepdim}}); runner.Run(stream); } else { - Tensor tmp_x; + phi::DenseTensor tmp_x; tmp_x.mutable_data(xdim, ctx.GetPlace()); const auto& power_runner1 = @@ -93,7 +93,6 @@ template class PnormGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - using Tensor = phi::DenseTensor; auto* x = ctx.Input("X"); auto* y = ctx.Input("Out"); auto* dy = ctx.Input(framework::GradVarName("Out")); @@ -113,8 +112,8 @@ class PnormGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - Tensor y_share(y->type()); - Tensor dy_share(dy->type()); + phi::DenseTensor y_share(y->type()); + phi::DenseTensor dy_share(dy->type()); y_share.ShareDataWith(*y); dy_share.ShareDataWith(*dy); auto ydim = xdim; @@ -130,22 +129,22 @@ class PnormGradNPUKernel : public framework::OpKernel { FillNpuTensorWithConstant(dx, static_cast(0)); dx->Resize(xdim); } else if (porder == INFINITY || porder == -INFINITY) { - Tensor x_abs; + phi::DenseTensor x_abs; x_abs.mutable_data(xdim, place); const auto& r_abs = NpuOpRunner("Abs", {*x}, {x_abs}, {}); r_abs.Run(stream); - Tensor t_cond; + phi::DenseTensor t_cond; t_cond.mutable_data(xdim, place); const auto& r_equal = NpuOpRunner("Equal", {x_abs, y_share}, {t_cond}, {}); r_equal.Run(stream); - Tensor t_zero; + phi::DenseTensor t_zero; t_zero.mutable_data({1}, place); FillNpuTensorWithConstant(&t_zero, static_cast(0)); - Tensor x_sign; + phi::DenseTensor x_sign; x_sign.mutable_data(xdim, place); const auto& r_sign = NpuOpRunner("Sign", {*x}, {x_sign}, {}); r_sign.Run(stream); @@ -157,17 +156,17 @@ class PnormGradNPUKernel : public framework::OpKernel { NpuOpRunner("SelectV2", {t_cond, *dx, t_zero}, {*dx}, {}); r_sel.Run(stream); } else { - Tensor x_abs; + phi::DenseTensor x_abs; x_abs.mutable_data(xdim, place); const auto& r_abs = NpuOpRunner("Abs", {*x}, {x_abs}, {}); r_abs.Run(stream); - Tensor x_sign; + phi::DenseTensor x_sign; x_sign.mutable_data(xdim, place); const auto& r_sign = NpuOpRunner("Sign", {*x}, {x_sign}, {}); r_sign.Run(stream); - Tensor y_pow; + phi::DenseTensor y_pow; y_pow.mutable_data(ydim, place); if (porder >= 1) { const auto& r_pow1 = NpuOpRunner( diff --git a/paddle/fluid/operators/pad3d_op_npu.cc b/paddle/fluid/operators/pad3d_op_npu.cc index 7694e0edbf9f9..497dc51e39f0d 100644 --- a/paddle/fluid/operators/pad3d_op_npu.cc +++ b/paddle/fluid/operators/pad3d_op_npu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static inline std::vector GetPaddings( const framework::ExecutionContext& context) { std::vector paddings(6); diff --git a/paddle/fluid/operators/pad_op_npu.cc b/paddle/fluid/operators/pad_op_npu.cc index 425defc9792c7..27443b8b425d7 100644 --- a/paddle/fluid/operators/pad_op_npu.cc +++ b/paddle/fluid/operators/pad_op_npu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PadNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index e5a066fdc6539..01095b6d429b4 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class PartialConcatOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index ef52bbad525a4..f4acf68dcbc70 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -23,8 +23,6 @@ namespace operators { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) -using Tensor = phi::DenseTensor; - template __global__ void ConcatPartialCUDAKernel(T **in, T *out, @@ -72,7 +70,7 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto in_vars = ctx.MultiInput("X"); - Tensor *out = ctx.Output("Out"); + phi::DenseTensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(in_vars[0] != nullptr, true, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index c0c6b2f607526..050752f23888b 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index aa2f30aaafc2c..6473f8d603789 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class PartialSumOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index c92e9618bfce0..093e0032b3cb9 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -23,8 +23,6 @@ namespace operators { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) -using Tensor = phi::DenseTensor; - template __global__ void SumArrayPartialCUDAKernel(T **in, T *out, @@ -77,7 +75,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto in_vars = ctx.MultiInput("X"); - Tensor *out = ctx.Output("Out"); + phi::DenseTensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ( in_vars[0] != nullptr, @@ -150,7 +148,7 @@ template class PartialSumGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const Tensor *out_grad = + const phi::DenseTensor *out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput(framework::GradVarName("X")); diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index 26f5039e6363f..fa4cc19d5e2c3 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PartialSumKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 25d2ac8ce0d7a..c160dc28bfda4 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -109,12 +109,12 @@ framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( void Pool2dOpMaker::Make() { AddInput( "X", - "(Tensor) The input tensor of pooling operator. " + "(phi::DenseTensor) The input tensor of pooling operator. " "The format of input tensor is NCHW, where N is batch size, C is the " "number of channels, H is the height of the feature, " "and W is the width of the feature."); AddOutput("Out", - "(Tensor) The output tensor of pooling operator. " + "(phi::DenseTensor) The output tensor of pooling operator. " "The format of output tensor is also NCHW, " "where N is batch size, C is the number of channels, " "H is the height of the feature, " @@ -301,14 +301,14 @@ class PoolOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { void Pool3dOpMaker::Make() { AddInput("X", - "(Tensor) The input tensor of pooling operator. " + "(phi::DenseTensor) The input tensor of pooling operator. " "The format of input tensor is NCDHW or NDHWC, where N is batch " "size, C is " "the number of channels, and D, H and W is the depth, height and " "width of " "the feature, respectively."); AddOutput("Out", - "(Tensor) The output tensor of pooling operator." + "(phi::DenseTensor) The output tensor of pooling operator." "The format of output tensor is also NCDHW or NDHWC, " "where N is batch size, C is " "the number of channels, and D, H and W is the depth, height and " diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index c08b589cbe12e..fd2c0ce15b461 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class PoolOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/pool_op_mlu.cc b/paddle/fluid/operators/pool_op_mlu.cc index e2af30faf36f4..6e422a645fffb 100644 --- a/paddle/fluid/operators/pool_op_mlu.cc +++ b/paddle/fluid/operators/pool_op_mlu.cc @@ -46,8 +46,8 @@ class MLUPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - Tensor *out = ctx.Output("Out"); + const phi::DenseTensor *in_x = ctx.Input("X"); + phi::DenseTensor *out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); @@ -212,11 +212,11 @@ class MLUPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - const Tensor *out = ctx.Input("Out"); - const Tensor *out_grad = + const phi::DenseTensor *in_x = ctx.Input("X"); + const phi::DenseTensor *out = ctx.Input("Out"); + const phi::DenseTensor *out_grad = ctx.Input(framework::GradVarName("Out")); - Tensor *in_x_grad = + phi::DenseTensor *in_x_grad = ctx.Output(framework::GradVarName("X")); in_x_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h index 1cc89cda21bc7..745b793f51147 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.h +++ b/paddle/fluid/operators/positive_negative_pair_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class PositiveNegativePairKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index 51aa3dcd39a35..8a2199e0231bf 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class PReluOp : public framework::OperatorWithKernel { public: PReluOp(const std::string &type, diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc index 9b3146c3b8487..ca291187b9cdd 100644 --- a/paddle/fluid/operators/prroi_pool_op.cc +++ b/paddle/fluid/operators/prroi_pool_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class PRROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu index b24ded79dd050..d1aa1d37d0479 100644 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ b/paddle/fluid/operators/prroi_pool_op.cu @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaximumNumBlocks = 4096; diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index 5eead81365053..a24b234a05da7 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -28,7 +28,6 @@ extern "C" { namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoD = framework::LoD; class PyramidHashOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/random_routing_op.cu b/paddle/fluid/operators/random_routing_op.cu index f7f111299c73d..afe45894d5c1a 100644 --- a/paddle/fluid/operators/random_routing_op.cu +++ b/paddle/fluid/operators/random_routing_op.cu @@ -29,8 +29,6 @@ static inline int GET_BLOCKS(const int N) { return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; } -using Tensor = phi::DenseTensor; - template __global__ void random_routing_kernel(int64_t* data, const int64_t length, diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc index 4c740c5985ade..80bd022aff340 100644 --- a/paddle/fluid/operators/rank_attention_op.cc +++ b/paddle/fluid/operators/rank_attention_op.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class RankAttentionOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc index ce06d1b1089a5..41de1f6b1300a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ReduceAnyNPUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc index d623bf23534a0..aec1640181bcc 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc @@ -33,8 +33,6 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -using Tensor = phi::DenseTensor; - USE_OP_ITSELF(reduce_any); USE_OP_DEVICE_KERNEL(reduce_any, NPU); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc index a23931c0aa246..ca19b9e6e52da 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc @@ -96,9 +96,10 @@ template class ReduceMaxGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Input("Out"); - auto* out_grad = context.Input(framework::GradVarName("Out")); + auto* x = context.Input("X"); + auto* out = context.Input("Out"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); auto reduce_dims = context.Attr>("dim"); bool reduce_all = context.Attr("reduce_all"); int in_dtype = context.Attr("in_dtype"); @@ -108,7 +109,8 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "MLU only support in_dtype == -1 in reduce_max_grad op.")); - auto* x_grad = context.Output(framework::GradVarName("X")); + auto* x_grad = + context.Output(framework::GradVarName("X")); x_grad->mutable_data(context.GetPlace()); auto place = context.GetPlace(); @@ -122,7 +124,7 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { } } - Tensor tmp_out, tmp_out_grad; + phi::DenseTensor tmp_out, tmp_out_grad; auto tmp_out_dims_vec = x_dims_vec; for (auto d : reduce_dims) { if (d < 0) { @@ -136,7 +138,7 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { tmp_out_grad.ShareDataWith(*out_grad); tmp_out_grad.Resize(phi::make_ddim(tmp_out_dims_vec)); - Tensor transformed_out(x->type()); + phi::DenseTensor transformed_out(x->type()); transformed_out.Resize(phi::make_ddim(x_dims_vec)); transformed_out.mutable_data(place); @@ -149,7 +151,7 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { transformed_out_desc.get(), GetBasePtr(&transformed_out)); - Tensor transformed_out_grad(x->type()); + phi::DenseTensor transformed_out_grad(x->type()); transformed_out_grad.Resize(phi::make_ddim(x_dims_vec)); transformed_out_grad.mutable_data(place); MLUCnnlTensorDesc tmp_out_grad_desc(tmp_out_grad); @@ -162,7 +164,7 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { GetBasePtr(&transformed_out_grad)); // compare - Tensor equal_cond; + phi::DenseTensor equal_cond; equal_cond.mutable_data(x_grad->dims(), place); MLUCnnlTensorDesc x_desc(*x); @@ -178,7 +180,7 @@ class ReduceMaxGradMLUKernel : public framework::OpKernel { GetBasePtr(&equal_cond)); // select - Tensor t_zero; + phi::DenseTensor t_zero; t_zero.mutable_data(x_grad->dims(), place); FillMLUTensorWithHostValue(context, static_cast(0), &t_zero); t_zero.Resize(x_grad->dims()); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc index 172786963e4c9..1ade0c6746918 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ReduceMaxNPUKernel : public framework::OpKernel { public: @@ -77,8 +76,8 @@ class ReduceMaxNPUKernel : public framework::OpKernel { ctx.template device_context(); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::INT64) { - auto op_func = [](const std::vector& inputs, - const std::vector& outputs, + auto op_func = [](const std::vector& inputs, + const std::vector& outputs, const NPUAttributeMap& attrs, const platform::NPUDeviceContext& dev_ctx) { const auto& runner = @@ -147,7 +146,7 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel { } } - Tensor tmp_out, tmp_out_grad; + phi::DenseTensor tmp_out, tmp_out_grad; auto tmp_out_dims_vec = x_dims_vec; for (auto d : reduce_dims) { if (d < 0) { @@ -161,7 +160,7 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel { tmp_out_grad.ShareDataWith(*out_grad); tmp_out_grad.Resize(phi::make_ddim(tmp_out_dims_vec)); - Tensor transformed_out(x->type()); + phi::DenseTensor transformed_out(x->type()); transformed_out.Resize(phi::make_ddim(x_dims_vec)); transformed_out.mutable_data(place); NpuOpRunner r_brd_out; @@ -170,7 +169,7 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel { .AddInput(std::move(x_dims_vec)) .AddOutput(transformed_out) .Run(stream); - Tensor transformed_out_grad(x->type()); + phi::DenseTensor transformed_out_grad(x->type()); transformed_out_grad.Resize(phi::make_ddim(x_dims_vec)); transformed_out_grad.mutable_data(place); NpuOpRunner r_brd_out_grad; @@ -181,14 +180,14 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel { .Run(stream); // compare - Tensor equal_cond; + phi::DenseTensor equal_cond; equal_cond.mutable_data(x_grad->dims(), place); const auto& r_equal = NpuOpRunner("Equal", {*x, transformed_out}, {equal_cond}, {}); r_equal.Run(stream); // select - Tensor t_zero; + phi::DenseTensor t_zero; t_zero.mutable_data(x_grad->dims(), place); FillNpuTensorWithConstant(&t_zero, static_cast(0)); t_zero.Resize(x_grad->dims()); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc index b73bde6275347..d1658c24733c9 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc @@ -54,7 +54,7 @@ class ReduceMeanGradMLUKernel : public framework::OpKernel { reduce_numel *= input_dims[d]; } - Tensor tmp_output_grad(output_grad->dtype()); + phi::DenseTensor tmp_output_grad(output_grad->dtype()); auto tmp_output_dims = input_dims; for (auto d : reduce_dims) { tmp_output_dims[d] = 1; diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc index feca58ce19861..35273df44d1e2 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc @@ -81,7 +81,7 @@ class NPUReduceMeanGradOpKernel : public framework::OpKernel { reduce_numel *= input_dims[d]; } - Tensor tensor_value(input_grad->dtype()); + phi::DenseTensor tensor_value(input_grad->dtype()); tensor_value.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant( &tensor_value, static_cast(1.0f / static_cast(reduce_numel))); @@ -96,8 +96,8 @@ class NPUReduceMeanGradOpKernel : public framework::OpKernel { .AddOutput(*input_grad) .Run(stream); - Tensor transformed_input_grad, transformed_out_grad; - Tensor tmp_output_grad; + phi::DenseTensor transformed_input_grad, transformed_out_grad; + phi::DenseTensor tmp_output_grad; auto tmp_output_dims = input_dims; for (auto d : reduce_dims) { tmp_output_dims[d] = 1; diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc index 19efb2e6bfb4c..e7401d7917763 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ReduceMinNPUKernel : public framework::OpKernel { public: @@ -76,8 +75,8 @@ class ReduceMinNPUKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); if (x->dtype() == experimental::DataType::INT64) { - auto op_func = [](const std::vector& inputs, - const std::vector& outputs, + auto op_func = [](const std::vector& inputs, + const std::vector& outputs, const NPUAttributeMap& attrs, const platform::NPUDeviceContext& dev_ctx) { const auto& runner = diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 027a787cbf50b..0cc7bf2898f86 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -48,7 +48,6 @@ namespace operators { keep_dim); \ } -using Tensor = phi::DenseTensor; using DDim = framework::DDim; inline void GetShuffledDim(const DDim& src_dims, @@ -137,7 +136,7 @@ void HandleLargeDim(const framework::ExecutionContext& context, const std::vector& dims, bool keep_dim) { // shuffle the reduced dim to the end - Tensor shuffled_input; + phi::DenseTensor shuffled_input; GetShuffledInput(context, input, &shuffled_input, dims); // transpose to 2D tensor whose shape is {unreduced, reduced}. @@ -168,7 +167,7 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, DDim out_dim(out->dims()); DDim x_dim(x->dims()); // transpose and reshape X - Tensor shuffled_x; + phi::DenseTensor shuffled_x; GetShuffledInput(context, x, &shuffled_x, dims); DDim shuffled_dim = shuffled_x.dims(); shuffled_x.Resize({unreduced, reduced}); @@ -185,7 +184,7 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, // transpose dX std::vector origin_axis(x_dim.size()); GetOriginDimFromShuffled(x_dim, dims, &origin_axis); - Tensor dx_tmp; + phi::DenseTensor dx_tmp; framework::TensorCopy(*dx, context.GetPlace(), &dx_tmp); dx_tmp.Resize(shuffled_dim); dx->Resize(x_dim); @@ -453,7 +452,7 @@ class ReduceGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { int in_dtype = context.Attr("in_dtype"); if (in_dtype >= 0) { - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; auto* pre_input = context.Input(framework::GradVarName("Out")); auto in_kernel_type = framework::OpKernelType( diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_function.h b/paddle/fluid/operators/reduce_ops/reduce_op_function.h index 39a0dc044f272..3176e489f89b3 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_function.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_function.h @@ -21,7 +21,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; template class ReduceProdNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h index 69c8935dafd6b..7b1b6bc831f0e 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h @@ -80,7 +80,7 @@ class ReduceSumGradKernel : public framework::OpKernel { int in_dtype = context.Attr("out_dtype"); if (in_dtype >= 0) { - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; auto* pre_input = context.Input(framework::GradVarName("Out")); auto in_kernel_type = framework::OpKernelType( diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc index 4ecf6e907b4cb..130c617f873ba 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc @@ -52,7 +52,7 @@ class ReduceSumGradMLUKernel : public framework::OpKernel { } } - Tensor tmp_out(out_grad->dtype()); + phi::DenseTensor tmp_out(out_grad->dtype()); auto tmp_output_dims = in_dims; for (auto d : reduce_dims) { tmp_output_dims[d] = 1; diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc index 6ba8a9c1373a1..9588aa54f3877 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc @@ -129,7 +129,7 @@ class ReduceSumGradNPUKernel : public framework::OpKernel { out_dims = UnsqueezeKernel::GetOutputShape( dims, out_grad->dims()); - Tensor out_grad_tmp(out_grad->type()); + phi::DenseTensor out_grad_tmp(out_grad->type()); out_grad_tmp.Resize(out_dims); out_grad_tmp.mutable_data(ctx.GetPlace()); framework::TensorCopy( diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 161f230bacbe4..42e6929508bff 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -41,8 +41,6 @@ class OpBase; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class ReshapeOp : public framework::OperatorWithKernel { public: ReshapeOp(const std::string &type, @@ -272,7 +270,7 @@ class ReshapeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor") { return expected_kernel_type; @@ -638,7 +636,7 @@ class Reshape2GradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor") { return expected_kernel_type; @@ -666,7 +664,7 @@ class Reshape2DoubleGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/rnn_op_mlu.cc b/paddle/fluid/operators/rnn_op_mlu.cc index cf4e255668232..1773c526b4635 100644 --- a/paddle/fluid/operators/rnn_op_mlu.cc +++ b/paddle/fluid/operators/rnn_op_mlu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; using TensorList = std::vector; template @@ -459,7 +458,7 @@ class RNNMLUGradKernel : public framework::OpKernel { input_grad->mutable_data(input->dims(), ctx.GetPlace()); FillMLUTensorWithHostValue(ctx, static_cast(0.0), input_grad); - Tensor a, b; + phi::DenseTensor a, b; phi::DenseTensor* dynamic_grad_pre_h = &a; phi::DenseTensor* dynamic_grad_pre_c = &b; if (init_h_grad) { diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index 6a7999c56557f..4407fbf1a8c96 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class ROIAlignOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/roi_align_op_mlu.cc b/paddle/fluid/operators/roi_align_op_mlu.cc index 5bde4dd7b6686..de0a8be93452d 100644 --- a/paddle/fluid/operators/roi_align_op_mlu.cc +++ b/paddle/fluid/operators/roi_align_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ROIAlignOpMLUKernel : public framework::OpKernel { public: @@ -76,7 +74,7 @@ class ROIAlignOpMLUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(lod.empty(), false, platform::errors::InvalidArgument( - "Input(ROIs) Tensor of ROIAlignOp " + "Input(ROIs) phi::DenseTensor of ROIAlignOp " "does not contain LoD information.")); auto rois_lod = lod.back(); rois_batch_size = rois_lod.size() - 1; @@ -110,7 +108,7 @@ class ROIAlignOpMLUKernel : public framework::OpKernel { } // only support float32 for now - Tensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); rois_cpu.Resize({rois_num, 4}); rois_cpu.mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); @@ -119,8 +117,8 @@ class ROIAlignOpMLUKernel : public framework::OpKernel { T* rois_cpu_ptr = rois_cpu.mutable_data(platform::CPUPlace()); // boxes; [batch_idx, x1, y1, x2, y2] - Tensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); - Tensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); boxes_cpu.Resize({rois_num, 5}); boxes_mlu.Resize({rois_num, 5}); T* boxes_cpu_ptr = boxes_cpu.mutable_data(platform::CPUPlace()); @@ -139,8 +137,8 @@ class ROIAlignOpMLUKernel : public framework::OpKernel { const std::vector perm_to_nhwc = {0, 2, 3, 1}; const std::vector perm_to_nchw = {0, 3, 1, 2}; - Tensor input_nhwc(in->type()); - Tensor output_nhwc(out->type()); + phi::DenseTensor input_nhwc(in->type()); + phi::DenseTensor output_nhwc(out->type()); TransposeFromMLUTensor( ctx, perm_to_nhwc, in, &input_nhwc, true /*need_reshape_or_alloc*/); auto output_dims = out->dims(); @@ -221,7 +219,7 @@ class ROIAlignGradOpMLUKernel : public framework::OpKernel { } } - Tensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor rois_cpu(framework::TransToPhiDataType(VT::FP32)); rois_cpu.Resize({rois_num, 4}); rois_cpu.mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); @@ -230,8 +228,8 @@ class ROIAlignGradOpMLUKernel : public framework::OpKernel { T* rois_cpu_ptr = rois_cpu.mutable_data(platform::CPUPlace()); // boxes; [batch_idx, x1, y1, x2, y2] - Tensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); - Tensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor boxes_cpu(framework::TransToPhiDataType(VT::FP32)); + phi::DenseTensor boxes_mlu(framework::TransToPhiDataType(VT::FP32)); boxes_cpu.Resize({rois_num, 5}); boxes_mlu.Resize({rois_num, 5}); T* boxes_cpu_ptr = boxes_cpu.mutable_data(platform::CPUPlace()); @@ -250,8 +248,8 @@ class ROIAlignGradOpMLUKernel : public framework::OpKernel { const std::vector perm_to_nhwc = {0, 2, 3, 1}; const std::vector perm_to_nchw = {0, 3, 1, 2}; - Tensor grads_nhwc(out_grad->type()); - Tensor grads_image_nhwc(in_grad->type()); + phi::DenseTensor grads_nhwc(out_grad->type()); + phi::DenseTensor grads_image_nhwc(in_grad->type()); TransposeFromMLUTensor(ctx, perm_to_nhwc, out_grad, diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc index 72578ca0177c0..06be3f35b3f23 100644 --- a/paddle/fluid/operators/roi_align_op_npu.cc +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -15,7 +15,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ROIAlignNPUKernel : public framework::OpKernel { @@ -54,7 +53,7 @@ class ROIAlignNPUKernel : public framework::OpKernel { int dtype = static_cast(ConvertToNpuDtype(framework::proto::VarType::FP32)); framework::NPUAttributeMap attr_cast = {{"dst_type", dtype}}; - Tensor ROIsNum_fp(ROIs->dtype()); + phi::DenseTensor ROIsNum_fp(ROIs->dtype()); ROIsNum_fp.Resize(phi::make_ddim({ROIs->dims()[0], 1})); ROIsNum_fp.mutable_data(ctx.GetPlace()); @@ -68,7 +67,7 @@ class ROIAlignNPUKernel : public framework::OpKernel { x_list.push_back(*ROIs); auto axis = 1; // output of concate - Tensor ROIs_N5(ROIs->dtype()); + phi::DenseTensor ROIs_N5(ROIs->dtype()); ROIs_N5.Resize(phi::make_ddim({ROIs->dims()[0], 5})); ROIs_N5.mutable_data(ctx.GetPlace()); @@ -137,9 +136,9 @@ class ROIAlignNPUGradKernel : public framework::OpKernel { // Cast RoisNum to fp32 tensor auto* RoisNum = ctx.Input("RoisNum"); - Tensor ROIs_N5; + phi::DenseTensor ROIs_N5; ROIs_N5.mutable_data({rois_num, 5}, place); - Tensor ROIsNum_fp; + phi::DenseTensor ROIsNum_fp; ROIsNum_fp.mutable_data(RoisNum->dims(), place); // shape = [rois_num] int nputype_fp32 = static_cast(ConvertToNpuDtype(framework::proto::VarType::FP32)); @@ -161,7 +160,7 @@ class ROIAlignNPUGradKernel : public framework::OpKernel { // function #if (CANN_VERSION_CODE < 504000) std::vector vec_dlt = {0, 0, 0, -1.0f, -1.0f}; - Tensor tsr_dlt; + phi::DenseTensor tsr_dlt; tsr_dlt.mutable_data({5}, place); framework::TensorFromVector(vec_dlt, ctx.device_context(), &tsr_dlt); ctx.template device_context().Wait(); diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index b2e8a6ae58883..e79975e6254eb 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class ROIPoolOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index 8871627b85242..7d61088dd9fd6 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -112,7 +112,6 @@ __global__ void gpu_compute_remove_accidental_hits(const int size, template class SampleLogitsCUDAKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { // get necessary inputs const phi::DenseTensor* logits = context.Input("Logits"); @@ -165,16 +164,17 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { context.Input("CustomizedSamples"); const phi::DenseTensor* customized_probabilities = context.Input("CustomizedProbabilities"); - PADDLE_ENFORCE_EQ(customized_samples, - samples, - platform::errors::InvalidArgument( - "CustomizedSamples must be the same Tensor with " - "Samples when use_customized_samples = True")); + PADDLE_ENFORCE_EQ( + customized_samples, + samples, + platform::errors::InvalidArgument( + "CustomizedSamples must be the same phi::DenseTensor with " + "Samples when use_customized_samples = True")); PADDLE_ENFORCE_EQ( customized_probabilities, probabilities, platform::errors::InvalidArgument( - "CustomizedProbabilities must be the same Tensor with " + "CustomizedProbabilities must be the same phi::DenseTensor with " "Probabilities when use_customized_samples = True")); } else { samples->mutable_data(context.GetPlace()); @@ -238,7 +238,6 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { template class SampleLogitsGradCUDAKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { auto logits_grad = context.Output(framework::GradVarName("Logits")); diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index 584d115d28ff3..fe53a12e5ed71 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -27,7 +27,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template @@ -212,7 +211,6 @@ static void compute_remove_accidental_hits(const platform::DeviceContext& ctx, template class SampleLogitsKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE_EQ( platform::is_cpu_place(context.GetPlace()), @@ -264,16 +262,17 @@ class SampleLogitsKernel : public framework::OpKernel { context.Input("CustomizedSamples"); const phi::DenseTensor* customized_probabilities = context.Input("CustomizedProbabilities"); - PADDLE_ENFORCE_EQ(customized_samples, - samples, - platform::errors::InvalidArgument( - "CustomizedSamples must be the same Tensor with " - "Samples when use_customized_samples = True")); + PADDLE_ENFORCE_EQ( + customized_samples, + samples, + platform::errors::InvalidArgument( + "CustomizedSamples must be the same phi::DenseTensor with " + "Samples when use_customized_samples = True")); PADDLE_ENFORCE_EQ( customized_probabilities, probabilities, platform::errors::InvalidArgument( - "CustomizedProbabilities must be the same Tensor with " + "CustomizedProbabilities must be the same phi::DenseTensor with " "Probabilities when use_customized_samples = True")); } else { samples->mutable_data(context.GetPlace()); @@ -308,7 +307,6 @@ class SampleLogitsKernel : public framework::OpKernel { template class SampleLogitsGradKernel : public framework::OpKernel { public: - using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { auto logits_grad = context.Output(framework::GradVarName("Logits")); diff --git a/paddle/fluid/operators/sampling_id_op.cc b/paddle/fluid/operators/sampling_id_op.cc index 6d2d3f4a60047..7e84077fd60ae 100644 --- a/paddle/fluid/operators/sampling_id_op.cc +++ b/paddle/fluid/operators/sampling_id_op.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SamplingIdOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index 43c0bdcf4043e..e5c4f744db4a5 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -27,8 +27,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SamplingIdKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index 41780561144b1..71f78911456f7 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SaveCombineOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/scatter_op_mlu.cc b/paddle/fluid/operators/scatter_op_mlu.cc index a4cb5d7424936..83cbbbd7b9e69 100644 --- a/paddle/fluid/operators/scatter_op_mlu.cc +++ b/paddle/fluid/operators/scatter_op_mlu.cc @@ -42,7 +42,7 @@ class ScatterMLUKernel : public framework::OpKernel { GetBasePtr(indices), mode); } else { - Tensor tensor_zeros(updates->type()); + phi::DenseTensor tensor_zeros(updates->type()); tensor_zeros.mutable_data(updates->dims(), ctx.GetPlace()); MLUCnnlTensorDesc tensor_zeros_desc(tensor_zeros); float value = 0.0; diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc index 6bffd24734055..ded722c7eb794 100644 --- a/paddle/fluid/operators/scatter_op_npu.cc +++ b/paddle/fluid/operators/scatter_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ScatterNPUKernel : public framework::OpKernel { public: @@ -49,16 +47,16 @@ class ScatterNPUKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); - auto op_func_update = [](const std::vector& inputs, - const std::vector& outputs, + auto op_func_update = [](const std::vector& inputs, + const std::vector& outputs, const NPUAttributeMap& attrs, const platform::NPUDeviceContext& dev_ctx) { const auto& runner = NpuOpRunner("TensorScatterUpdate", inputs, outputs, attrs); runner.Run(dev_ctx.stream()); }; - auto op_func_add = [](const std::vector& inputs, - const std::vector& outputs, + auto op_func_add = [](const std::vector& inputs, + const std::vector& outputs, const NPUAttributeMap& attrs, const platform::NPUDeviceContext& dev_ctx) { const auto& runner = diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index 34728c86c56b6..15f87803f5ab8 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -28,7 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoD = framework::LoD; template diff --git a/paddle/fluid/operators/seed_op.cc b/paddle/fluid/operators/seed_op.cc index 88a1884ae53e4..93d57aedd8aff 100644 --- a/paddle/fluid/operators/seed_op.cc +++ b/paddle/fluid/operators/seed_op.cc @@ -17,7 +17,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; class SeedOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/seed_op.h b/paddle/fluid/operators/seed_op.h index a1c3484b7a728..c3cbc16fb4884 100644 --- a/paddle/fluid/operators/seed_op.h +++ b/paddle/fluid/operators/seed_op.h @@ -19,7 +19,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; static int get_seed(const framework::ExecutionContext& context) { int user_seed = context.Attr("seed"); diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index 86049bec1eb21..a41b0f5f2b996 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -36,8 +36,6 @@ class OpBase; namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SetValue : public framework::OperatorWithKernel { public: SetValue(const std::string &type, @@ -55,7 +53,7 @@ class SetValue : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensorList" || var_name == "EndsTensorList" || var_name == "StepsTensorList") { @@ -70,24 +68,28 @@ class SetValueMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { // Input - AddInput("Input", "(Tensor) Input tensor of set_value operator."); - AddInput("ValueTensor", "(Tensor) Value tensor of set_value operator.") + AddInput("Input", "(phi::DenseTensor) Input tensor of set_value operator."); + AddInput("ValueTensor", + "(phi::DenseTensor) Value tensor of set_value operator.") .AsDispensable(); AddInput("StartsTensorList", - "(vector>, optional) If provided, set_value will " + "(vector>, optional) If provided, " + "set_value will " "use this. The shape of the tensor in vector must be [1]." "It has higher priority compare with attr(starts).") .AsDuplicable() .AsDispensable(); AddInput("EndsTensorList", - "(vector>, optional) If provided, set_value will " + "(vector>, optional) If provided, " + "set_value will " "use this. The shape of the tensor in vector must BE [1]." "It has higher priority compare with attr(ends).") .AsDuplicable() .AsDispensable(); AddInput("StepsTensorList", - "(vector>, optional) If provided, set_value will " + "(vector>, optional) If provided, " + "set_value will " "use this. The shape of the tensor in vector must BE [1]." "It has higher priority compare with attr(steps).") .AsDuplicable() @@ -95,8 +97,9 @@ class SetValueMaker : public framework::OpProtoAndCheckerMaker { // Output AddOutput("Out", - "(Tensor) Output tensor of set_value operator. The output is the " - "same Tensor as input"); + "(phi::DenseTensor) Output tensor of set_value operator. The " + "output is the " + "same phi::DenseTensor as input"); // Attr AddAttr("dtype", "data type of input.") @@ -142,7 +145,7 @@ class SetValueMaker : public framework::OpProtoAndCheckerMaker { AddAttr>("shape", "(vector) Shape of values.") .SetDefault({}); AddComment(R"DOC(SetValue operator. -Assignment to a Tensor in static mode. +Assignment to a phi::DenseTensor in static mode. )DOC"); } }; @@ -220,7 +223,7 @@ class SetValueGrad : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensorList" || var_name == "EndsTensorList" || var_name == "StepsTensorList") { diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index 7ef766020251b..d4ed1ce586e8f 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -31,7 +31,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; inline std::string GetValueName(framework::proto::VarType::Type data_type) { diff --git a/paddle/fluid/operators/set_value_op_mlu.cc b/paddle/fluid/operators/set_value_op_mlu.cc index 1b950a6da6084..06369b83bbab9 100644 --- a/paddle/fluid/operators/set_value_op_mlu.cc +++ b/paddle/fluid/operators/set_value_op_mlu.cc @@ -102,7 +102,7 @@ class SetValueMLUKernel : public framework::OpKernel { ends_indices[axis_index] = static_cast(ends[i]); strides_indices[axis_index] = static_cast(steps[i]); } - Tensor value_t(in->type()); + phi::DenseTensor value_t(in->type()); if (value_tensor != nullptr) { value_t.ShareDataWith(*value_tensor); } else { @@ -116,7 +116,7 @@ class SetValueMLUKernel : public framework::OpKernel { value_t.Resize(value_dims); } - Tensor value_temp(in->type()); + phi::DenseTensor value_temp(in->type()); if (slice_dims_for_assign == value_t.dims()) { value_temp.ShareDataWith(value_t); } else { @@ -133,7 +133,7 @@ class SetValueMLUKernel : public framework::OpKernel { int64_t input_numel = phi::product(in_dims); int64_t value_numel = phi::product(value_temp.dims()); - Tensor in_temp, out_temp, val_temp, index_out; + phi::DenseTensor in_temp, out_temp, val_temp, index_out; int64_t stride_step = phi::product(in_dims); std::vector index_indices(stride_step); std::iota(index_indices.begin(), index_indices.end(), 0); @@ -185,7 +185,7 @@ class SetValueMLUKernel : public framework::OpKernel { phi::product(slice_dims_for_assign), platform::errors::InvalidArgument( "OP(set_value) error index indices and value update not match ")); - Tensor index_final; + phi::DenseTensor index_final; index_final.ShareDataWith(index_out); int64_t indices_numel = phi::product(index_dims); auto new_index_dims = phi::make_ddim({indices_numel}); diff --git a/paddle/fluid/operators/set_value_op_npu.cc b/paddle/fluid/operators/set_value_op_npu.cc index 7526b13311b05..9dde6c6fbb3c0 100644 --- a/paddle/fluid/operators/set_value_op_npu.cc +++ b/paddle/fluid/operators/set_value_op_npu.cc @@ -132,7 +132,7 @@ class SetValueNPUKernel : public framework::OpKernel { platform::errors::InvalidArgument( "OP(set_value) error index indices and value update not match ")); - Tensor value_t(in->type()); + phi::DenseTensor value_t(in->type()); if (value_tensor != nullptr) { value_t.ShareDataWith(*value_tensor); } else { @@ -148,7 +148,7 @@ class SetValueNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); - Tensor value_temp(in->type()); + phi::DenseTensor value_temp(in->type()); if (slice_dims_for_assign == value_t.dims()) { value_temp.ShareDataWith(value_t); } else { @@ -165,7 +165,7 @@ class SetValueNPUKernel : public framework::OpKernel { int64_t input_numel = phi::product(in_dims); int64_t index_numel = index_indices.size(); - Tensor in_temp, out_temp, val_temp; + phi::DenseTensor in_temp, out_temp, val_temp; in_temp.ShareDataWith(*in); out_temp.ShareDataWith(*out); val_temp.ShareDataWith(value_temp); diff --git a/paddle/fluid/operators/shape_op_mlu.cc b/paddle/fluid/operators/shape_op_mlu.cc index bd51b49851840..f69a202819935 100644 --- a/paddle/fluid/operators/shape_op_mlu.cc +++ b/paddle/fluid/operators/shape_op_mlu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; template @@ -39,7 +38,7 @@ class ShapeMLUKernel : public framework::OpKernel { out_t->mutable_data(ctx.GetPlace()); // shape op cpu - Tensor shape_on_cpu( + phi::DenseTensor shape_on_cpu( framework::TransToPhiDataType(framework::proto::VarType::INT32)); shape_on_cpu.Resize({in_dims.size()}); auto cpu_data = shape_on_cpu.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/operators/shape_op_npu.cc b/paddle/fluid/operators/shape_op_npu.cc index 60a0162818c9d..f66ae5dc750fe 100644 --- a/paddle/fluid/operators/shape_op_npu.cc +++ b/paddle/fluid/operators/shape_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class ShapeNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/shard_index_op_npu.cc b/paddle/fluid/operators/shard_index_op_npu.cc index 3cc025ca9ed64..488615f66325e 100644 --- a/paddle/fluid/operators/shard_index_op_npu.cc +++ b/paddle/fluid/operators/shard_index_op_npu.cc @@ -18,7 +18,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class ShardIndexNPUKernel : public framework::OpKernel { public: @@ -67,17 +66,17 @@ class ShardIndexNPUKernel : public framework::OpKernel { out->set_lod(in->lod()); out->mutable_data(place); - Tensor tmp(in->type()); + phi::DenseTensor tmp(in->type()); tmp.mutable_data(framework::DDim({1}), place); FillNpuTensorWithConstant(&tmp, shard_size); - Tensor condition(experimental::DataType::BOOL); + phi::DenseTensor condition(experimental::DataType::BOOL); condition.mutable_data(in->dims(), place); - Tensor tmp2(in->type()); + phi::DenseTensor tmp2(in->type()); tmp2.mutable_data(in->dims(), place); - Tensor tmp3(in->type()); + phi::DenseTensor tmp3(in->type()); tmp3.mutable_data(in->dims(), place); auto stream = @@ -103,7 +102,7 @@ class ShardIndexNPUKernel : public framework::OpKernel { runner2.SetType("Equal"); runner2.Run(stream); - Tensor tmp4(in->type()); + phi::DenseTensor tmp4(in->type()); tmp4.mutable_data(in->dims(), place); FillNpuTensorWithConstant(&tmp4, ignore_value); tmp4.Resize(in->dims()); diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h index 2f1fbee16e3d9..4bc1289bf468c 100644 --- a/paddle/fluid/operators/shuffle_batch_op.h +++ b/paddle/fluid/operators/shuffle_batch_op.h @@ -32,7 +32,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template using Vector = framework::Vector; diff --git a/paddle/fluid/operators/shuffle_channel_op.cu b/paddle/fluid/operators/shuffle_channel_op.cu index 4869a4c6c5e22..6aa59becb1d6f 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cu +++ b/paddle/fluid/operators/shuffle_channel_op.cu @@ -16,7 +16,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaximumNumBlocks = 4096; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc index d77724281327c..431a36d414c99 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; const int kIgnoreIndex = -100; void CheckAttrs(const framework::ExecutionContext& ctx) { diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc index ea3f119a05a91..df4270b6f23bc 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; const int kIgnoreIndex = -100; void CheckAttrs(const framework::ExecutionContext& ctx) { diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h index 8c055c2323c84..e706da9e01419 100644 --- a/paddle/fluid/operators/similarity_focus_op.h +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class SimilarityFocusKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index d6f48d334759d..a418719907872 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SliceOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -181,7 +179,7 @@ class SliceOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensor" || var_name == "EndsTensor") { return expected_kernel_type; @@ -349,7 +347,7 @@ class SliceOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensor" || var_name == "EndsTensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/slice_op_mlu.cc b/paddle/fluid/operators/slice_op_mlu.cc index 1935e2d0c9b14..771fca6a5ef18 100644 --- a/paddle/fluid/operators/slice_op_mlu.cc +++ b/paddle/fluid/operators/slice_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SliceMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc index 13ad263575698..59d6e2c2e42c1 100644 --- a/paddle/fluid/operators/slice_op_npu.cc +++ b/paddle/fluid/operators/slice_op_npu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; void UpdateAttr(const framework::DDim& in_dims, @@ -199,7 +198,7 @@ class SliceGradNPUKernel : public framework::OpKernel { paddings[i][1] = static_cast(in_dims[i] - size[i] - offsets[i]); } - Tensor tmp_dout; + phi::DenseTensor tmp_dout; tmp_dout.ShareDataWith(*dout); auto out_dims = dout->dims(); auto decrease_axis = ctx.Attr>("decrease_axis"); diff --git a/paddle/fluid/operators/smooth_l1_loss_op.h b/paddle/fluid/operators/smooth_l1_loss_op.h index 3cc565ef91203..e11f629d86dda 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op.h +++ b/paddle/fluid/operators/smooth_l1_loss_op.h @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template @@ -79,7 +78,7 @@ class SmoothL1LossKernel : public framework::OpKernel { } auto in_counts = in0->numel(); - Tensor ptensor_errors; + phi::DenseTensor ptensor_errors; ptensor_errors.mutable_data({static_cast(in_counts)}, context.GetPlace()); auto errors = EigenVector::Flatten(ptensor_errors); @@ -138,7 +137,7 @@ class SmoothL1LossGradKernel : public framework::OpKernel { auto mat_dims = phi::make_ddim({static_cast(in_dims[0]), static_cast(cols)}); - Tensor ptensor_diff; + phi::DenseTensor ptensor_diff; ptensor_diff.mutable_data({static_cast(counts)}, context.GetPlace()); auto diff = EigenVector::Flatten(ptensor_diff); @@ -147,7 +146,7 @@ class SmoothL1LossGradKernel : public framework::OpKernel { SmoothL1LossBackward(sigma2)); // compute weights - Tensor ptensor_weights; + phi::DenseTensor ptensor_weights; ptensor_weights.mutable_data(mat_dims, context.GetPlace()); auto weights = EigenMatrix::From(ptensor_weights); // initialize to 1.0 diff --git a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc index 1a4fb14bbb0b6..811a016c6515c 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc @@ -42,12 +42,12 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { const auto& runner1 = NpuOpRunner("Sub", {*in_x, *in_y}, {*out_diff}, {}); runner1.Run(stream); - Tensor no_reduce_loss(in_x->dtype()); + phi::DenseTensor no_reduce_loss(in_x->dtype()); no_reduce_loss.Resize(in_x->dims()); no_reduce_loss.mutable_data(context.GetPlace()); // multiply inside weight before get the loss if (has_weight) { - Tensor tmp_diff(out_diff->dtype()); + phi::DenseTensor tmp_diff(out_diff->dtype()); tmp_diff.Resize(out_diff->dims()); tmp_diff.mutable_data(context.GetPlace()); const auto& runner2 = @@ -59,11 +59,11 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { context.template device_context(), out_diff); - Tensor tmp_x(in_x->dtype()); + phi::DenseTensor tmp_x(in_x->dtype()); tmp_x.Resize(in_x->dims()); tmp_x.mutable_data(context.GetPlace()); - Tensor tmp_y(in_y->dtype()); + phi::DenseTensor tmp_y(in_y->dtype()); tmp_y.Resize(in_y->dims()); tmp_y.mutable_data(context.GetPlace()); @@ -90,7 +90,7 @@ class SmoothL1LossNPUKernel : public framework::OpKernel { // multiply outside weight and loss // reduceSum because the output'shape must be [B,1] if (has_weight) { - Tensor tmp_loss(no_reduce_loss.dtype()); + phi::DenseTensor tmp_loss(no_reduce_loss.dtype()); tmp_loss.Resize(no_reduce_loss.dims()); tmp_loss.mutable_data(context.GetPlace()); const auto& runner4 = @@ -134,13 +134,13 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { .stream(); // diff == in_x - in_y == diff - 0 - Tensor tmp_zero(diff->dtype()); + phi::DenseTensor tmp_zero(diff->dtype()); tmp_zero.Resize(diff->dims()); tmp_zero.mutable_data(context.GetPlace()); const auto& runner_zero = NpuOpRunner("ZerosLike", {*diff}, {tmp_zero}, {}); runner_zero.Run(stream); - Tensor grad(diff->dtype()); + phi::DenseTensor grad(diff->dtype()); grad.Resize(diff->dims()); grad.mutable_data(context.GetPlace()); // broadcast og(output_grad) to adapt to the npu interface @@ -151,7 +151,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { {{"shape", phi::vectorize(diff->dims())}}); runner_broad.Run(stream); - Tensor gradient(diff->dtype()); + phi::DenseTensor gradient(diff->dtype()); gradient.Resize(diff->dims()); gradient.mutable_data(context.GetPlace()); // diff == diff - 0 == in_x - in_y @@ -163,14 +163,14 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { // mul weight and gradient if (has_weight) { - Tensor weight(inside_weight->dtype()); + phi::DenseTensor weight(inside_weight->dtype()); weight.Resize(inside_weight->dims()); weight.mutable_data(context.GetPlace()); const auto& runner_weight = NpuOpRunner("Mul", {*inside_weight, *outside_weight}, {weight}, {}); runner_weight.Run(stream); - Tensor tmp_grad(gradient.dtype()); + phi::DenseTensor tmp_grad(gradient.dtype()); tmp_grad.Resize(gradient.dims()); tmp_grad.mutable_data(context.GetPlace()); const auto& runner_weight_grad = @@ -196,7 +196,7 @@ class SmoothL1LossGradNPUKernel : public framework::OpKernel { // outy_grad = - gradient if (outy_grad) { outy_grad->mutable_data(context.GetPlace()); - Tensor coeff(experimental::DataType::FLOAT32); + phi::DenseTensor coeff(experimental::DataType::FLOAT32); coeff.mutable_data({1}, context.GetPlace()); FillNpuTensorWithConstant(&coeff, -1); const auto& runner_y_grad = diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc index 91333b3393000..87d788b478367 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SoftmaxWithCrossEntropyMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc index d42f993f46219..6a51198e75460 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SoftmaxWithCrossEntropyNPUKernel : public framework::OpKernel { public: @@ -61,7 +59,7 @@ class SoftmaxWithCrossEntropyNPUKernel : public framework::OpKernel { backprop->mutable_data(ctx.GetPlace()); softmax->mutable_data(ctx.GetPlace()); - Tensor logits_2d, labels_1d, loss_1d, backprop_2d, softmax_2d; + phi::DenseTensor logits_2d, labels_1d, loss_1d, backprop_2d, softmax_2d; logits_2d.ShareDataWith(*logits).Resize({n, d}); labels_1d.ShareDataWith(*labels).Resize({n}); loss_1d.ShareDataWith(*loss).Resize({n}); @@ -110,7 +108,7 @@ class SoftmaxWithCrossEntropyGradNPUKernel : public framework::OpKernel { const int n = phi::funcs::SizeToAxis(axis, logits_grad->dims()); const int d = phi::funcs::SizeFromAxis(axis, logits_grad->dims()); - Tensor logits_grad_2d, loss_grad_1d, backprop_2d; + phi::DenseTensor logits_grad_2d, loss_grad_1d, backprop_2d; logits_grad_2d.ShareDataWith(*logits_grad).Resize({n, d}); loss_grad_1d.ShareDataWith(*loss_grad).Resize({n}); diff --git a/paddle/fluid/operators/space_to_depth_op.cc b/paddle/fluid/operators/space_to_depth_op.cc index 6cc8d0f79be4e..0d4af9c0ce94a 100644 --- a/paddle/fluid/operators/space_to_depth_op.cc +++ b/paddle/fluid/operators/space_to_depth_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class SpaceToDepthOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index b03a0b6c84e71..c0ad3c9a57823 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -203,7 +203,6 @@ __global__ void BlockSparseSoftmaxBackward(T* dst, } } -using Tensor = phi::DenseTensor; /* input: sparse C in CSR format (num_rows,num_rows) output: sparse C after softmax operation @@ -641,7 +640,7 @@ void DotDsd(const phi::GPUContext& ctx, platform::dynload::cusparseDestroy(handle); } -std::vector GetSplitTensor(phi::DenseTensor* input) { +std::vector GetSplitTensor(phi::DenseTensor* input) { auto dims = input->dims(); int batch_size = dims[0]; int num_heads = dims[1]; @@ -687,14 +686,16 @@ class SparseAttentionCUDAKernel : public framework::OpKernel { int M = query_dims[2]; int N = query_dims[3]; - std::vector query_lists = GetSplitTensor(&query); - std::vector key_lists = GetSplitTensor(&key); - std::vector value_lists = GetSplitTensor(&value); - std::vector offset_lists = GetSplitTensor(&offset); - std::vector columns_lists = GetSplitTensor(&columns); - std::vector result_sdd_lists = GetSplitTensor(&result_sdd); - std::vector result_softmax_lists = GetSplitTensor(&result_softmax); - std::vector output_lists = GetSplitTensor(&output); + std::vector query_lists = GetSplitTensor(&query); + std::vector key_lists = GetSplitTensor(&key); + std::vector value_lists = GetSplitTensor(&value); + std::vector offset_lists = GetSplitTensor(&offset); + std::vector columns_lists = GetSplitTensor(&columns); + std::vector result_sdd_lists = + GetSplitTensor(&result_sdd); + std::vector result_softmax_lists = + GetSplitTensor(&result_softmax); + std::vector output_lists = GetSplitTensor(&output); const auto& dev_ctx = ctx.cuda_device_context(); const int iter_num = batch_size * num_heads; @@ -802,17 +803,18 @@ class SparseAttentionGradCUDAKernel : public framework::OpKernel { int M = query_dims[2]; int N = query_dims[3]; - std::vector query_lists = GetSplitTensor(&query); - std::vector key_lists = GetSplitTensor(&key); - std::vector value_lists = GetSplitTensor(&value); - std::vector offset_lists = GetSplitTensor(&offset); - std::vector columns_lists = GetSplitTensor(&columns); - std::vector sparse_dot_sdd_lists = GetSplitTensor(&sparse_dot_sdd); - std::vector softmax_lists = GetSplitTensor(&softmax); - std::vector dout_lists = GetSplitTensor(&dout); - std::vector dquery_lists = GetSplitTensor(&dquery); - std::vector dkey_lists = GetSplitTensor(&dkey); - std::vector dvalue_lists = GetSplitTensor(&dvalue); + std::vector query_lists = GetSplitTensor(&query); + std::vector key_lists = GetSplitTensor(&key); + std::vector value_lists = GetSplitTensor(&value); + std::vector offset_lists = GetSplitTensor(&offset); + std::vector columns_lists = GetSplitTensor(&columns); + std::vector sparse_dot_sdd_lists = + GetSplitTensor(&sparse_dot_sdd); + std::vector softmax_lists = GetSplitTensor(&softmax); + std::vector dout_lists = GetSplitTensor(&dout); + std::vector dquery_lists = GetSplitTensor(&dquery); + std::vector dkey_lists = GetSplitTensor(&dkey); + std::vector dvalue_lists = GetSplitTensor(&dvalue); const int iter_num = batch_size * num_heads; const auto& dev_ctx = ctx.cuda_device_context(); @@ -831,7 +833,7 @@ class SparseAttentionGradCUDAKernel : public framework::OpKernel { // dSoftmax = dOut * transpose(Value) int nnz_num = columns_lists[i].numel(); - Tensor dsoftmax; + phi::DenseTensor dsoftmax; dsoftmax.Resize({nnz_num}); dsoftmax.mutable_data(ctx.GetPlace()); DotSdd(dev_ctx, @@ -846,7 +848,7 @@ class SparseAttentionGradCUDAKernel : public framework::OpKernel { true); // dSparseDotSdd = dSoftmax * softmax'(SparseDotSdd) - Tensor dsparse_dot_sdd; + phi::DenseTensor dsparse_dot_sdd; dsparse_dot_sdd.Resize({nnz_num}); dsparse_dot_sdd.mutable_data(ctx.GetPlace()); SparseSoftmaxBackward(dev_ctx, diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc index cda18720e7aba..77928c7efc8da 100644 --- a/paddle/fluid/operators/split_op_mlu.cc +++ b/paddle/fluid/operators/split_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SplitMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/split_op_npu.cc b/paddle/fluid/operators/split_op_npu.cc index 2fa8fa2a805eb..966f2ea6849b9 100644 --- a/paddle/fluid/operators/split_op_npu.cc +++ b/paddle/fluid/operators/split_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SplitNPUKernel : public framework::OpKernel { public: @@ -44,7 +42,7 @@ class SplitNPUKernel : public framework::OpKernel { "The SectionsTensorList is not supported on NPU now.")); } - std::vector outputs; + std::vector outputs; for (size_t j = 0; j < outs.size(); ++j) { outs[j]->mutable_data(ctx.GetPlace()); outputs.push_back(*outs[j]); diff --git a/paddle/fluid/operators/squared_l2_distance_op.h b/paddle/fluid/operators/squared_l2_distance_op.h index 1698c65fc47ac..f0838c4fad2de 100644 --- a/paddle/fluid/operators/squared_l2_distance_op.h +++ b/paddle/fluid/operators/squared_l2_distance_op.h @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SquaredL2DistanceKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc b/paddle/fluid/operators/squared_l2_norm_op_mlu.cc index fcd83b40875ec..0c558502ddf65 100644 --- a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc +++ b/paddle/fluid/operators/squared_l2_norm_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SquaredL2NormMLUKernel : public framework::OpKernel { public: @@ -82,7 +80,7 @@ class SquaredL2NormGradMLUKernel : public framework::OpKernel { auto place = context.GetPlace(); // broadcast out_grad - Tensor broadcasted_out_grad; + phi::DenseTensor broadcasted_out_grad; broadcasted_out_grad.mutable_data(x_grad->dims(), place); MLUCnnlTensorDesc broadcasted_out_grad_desc(broadcasted_out_grad); MLUCnnlTensorDesc out_grad_desc(*out_grad); @@ -93,7 +91,7 @@ class SquaredL2NormGradMLUKernel : public framework::OpKernel { GetBasePtr(&broadcasted_out_grad)); // mul x - Tensor tmp_x_grad; + phi::DenseTensor tmp_x_grad; tmp_x_grad.mutable_data(x_grad->dims(), place); MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc tmp_x_grad_desc(tmp_x_grad); diff --git a/paddle/fluid/operators/squared_l2_norm_op_npu.cc b/paddle/fluid/operators/squared_l2_norm_op_npu.cc index 25260ed4c1286..0cebf8e59d6a6 100644 --- a/paddle/fluid/operators/squared_l2_norm_op_npu.cc +++ b/paddle/fluid/operators/squared_l2_norm_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class SquaredL2NormNPUKernel : public framework::OpKernel { public: @@ -65,7 +63,7 @@ class SquaredL2NormGradNPUKernel : public framework::OpKernel { .stream(); // broadcast out_grad - Tensor broadcasted_out_grad; + phi::DenseTensor broadcasted_out_grad; broadcasted_out_grad.mutable_data(x_grad->dims(), place); const auto &broadcast_runner = NpuOpRunner("BroadcastToD", @@ -74,13 +72,13 @@ class SquaredL2NormGradNPUKernel : public framework::OpKernel { {{"shape", phi::vectorize(x_grad->dims())}}); broadcast_runner.Run(stream); // mul x - Tensor tmp_x_grad; + phi::DenseTensor tmp_x_grad; tmp_x_grad.mutable_data(x_grad->dims(), place); const auto &mul_x_runner = NpuOpRunner("Mul", {broadcasted_out_grad, *x}, {tmp_x_grad}, {}); mul_x_runner.Run(stream); // mul coefficient:2 - Tensor coefficient; + phi::DenseTensor coefficient; coefficient.mutable_data({1}, place); FillNpuTensorWithConstant(&coefficient, static_cast(2.0)); x_grad->mutable_data(place); diff --git a/paddle/fluid/operators/stack_op_mlu.cc b/paddle/fluid/operators/stack_op_mlu.cc index eeac200676f4a..16076a180a54e 100644 --- a/paddle/fluid/operators/stack_op_mlu.cc +++ b/paddle/fluid/operators/stack_op_mlu.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class StackMLUKernel : public framework::OpKernel { public: @@ -31,10 +29,10 @@ class StackMLUKernel : public framework::OpKernel { if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); - PADDLE_ENFORCE_GT( - num, - 0, - platform::errors::InvalidArgument("number of input Tensor <= 0")); + PADDLE_ENFORCE_GT(num, + 0, + platform::errors::InvalidArgument( + "number of input phi::DenseTensor <= 0")); std::vector x_descs; std::vector x_raw_descs; diff --git a/paddle/fluid/operators/stack_op_npu.cc b/paddle/fluid/operators/stack_op_npu.cc index 3b5c0b1dc0cb6..7919294f60c33 100644 --- a/paddle/fluid/operators/stack_op_npu.cc +++ b/paddle/fluid/operators/stack_op_npu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class StackNPUKernel : public framework::OpKernel { public: @@ -30,10 +28,10 @@ class StackNPUKernel : public framework::OpKernel { if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); - PADDLE_ENFORCE_GT( - num, - 0, - platform::errors::InvalidArgument("number of input Tensor <= 0")); + PADDLE_ENFORCE_GT(num, + 0, + platform::errors::InvalidArgument( + "number of input phi::DenseTensor <= 0")); auto stream = ctx.template device_context() @@ -61,10 +59,10 @@ class StackGradNPUKernel : public framework::OpKernel { if (axis < 0) axis += dy->dims().size(); int num = dy->dims()[axis]; - PADDLE_ENFORCE_GT( - num, - 0, - platform::errors::InvalidArgument("number of input Tensor <= 0")); + PADDLE_ENFORCE_GT(num, + 0, + platform::errors::InvalidArgument( + "number of input phi::DenseTensor <= 0")); auto stream = ctx.template device_context() diff --git a/paddle/fluid/operators/stft_op.h b/paddle/fluid/operators/stft_op.h index 23130f687e305..b0d6091ecd37b 100644 --- a/paddle/fluid/operators/stft_op.h +++ b/paddle/fluid/operators/stft_op.h @@ -27,8 +27,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class StftKernel : public framework::OpKernel { public: @@ -59,7 +57,7 @@ class StftKernel : public framework::OpKernel { std::vector axes = {1}; // Frame - Tensor frames; + phi::DenseTensor frames; framework::DDim frames_dims(out->dims()); frames_dims.at(axes.back()) = n_fft; frames.mutable_data(frames_dims, ctx.GetPlace()); @@ -73,7 +71,7 @@ class StftKernel : public framework::OpKernel { /*is_grad*/ false); // Window - Tensor frames_w; + phi::DenseTensor frames_w; frames_w.mutable_data(frames_dims, ctx.GetPlace()); ElementwiseComputeEx, DeviceContext, T>( ctx, &frames, window, axes.back(), MulFunctor(), &frames_w); @@ -93,7 +91,7 @@ class StftKernel : public framework::OpKernel { framework::DDim onesided_dims(out->dims()); const int64_t onesided_axis_size = out->dims().at(axes.back()) / 2 + 1; onesided_dims.at(axes.back()) = onesided_axis_size; - Tensor onesided_out; + phi::DenseTensor onesided_out; onesided_out.mutable_data(onesided_dims, ctx.GetPlace()); fft_r2c_func(dev_ctx, frames_w, &onesided_out, axes, normalization, true); phi::funcs::FFTFillConj( @@ -125,12 +123,12 @@ class StftGradKernel : public framework::OpKernel { const int seq_length = dx->dims()[dx_rank - 1]; std::vector axes = {1}; - Tensor d_frames_w; + phi::DenseTensor d_frames_w; framework::DDim d_frames_dims(dy->dims()); d_frames_dims.at(axes.back()) = n_fft; d_frames_w.mutable_data(d_frames_dims, ctx.GetPlace()); - Tensor complex_d_frames_w; + phi::DenseTensor complex_d_frames_w; complex_d_frames_w.mutable_data(d_frames_dims, ctx.GetPlace()); // dy -> d_frames_w @@ -146,7 +144,7 @@ class StftGradKernel : public framework::OpKernel { fft_c2c_func( dev_ctx, *dy, &complex_d_frames_w, axes, normalization, false); } else { - Tensor full_dy; + phi::DenseTensor full_dy; full_dy.mutable_data(d_frames_dims, ctx.GetPlace()); auto zero_length = static_cast(full_dy.dims().at(axes.back()) - dy->dims().at(axes.back())); @@ -163,7 +161,7 @@ class StftGradKernel : public framework::OpKernel { phi::RealKernel(dev_ctx, complex_d_frames_w, &d_frames_w); // d_frames_w -> d_frames - Tensor d_frames; + phi::DenseTensor d_frames; d_frames.mutable_data(d_frames_dims, ctx.GetPlace()); ElementwiseComputeEx, DeviceContext, T>( ctx, &d_frames_w, window, axes.back(), MulFunctor(), &d_frames); diff --git a/paddle/fluid/operators/strided_slice_op.cc b/paddle/fluid/operators/strided_slice_op.cc index a91b210f2dc7b..c08f214ab58bc 100644 --- a/paddle/fluid/operators/strided_slice_op.cc +++ b/paddle/fluid/operators/strided_slice_op.cc @@ -26,8 +26,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class StridedSliceOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -69,7 +67,7 @@ class StridedSliceOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensor" || var_name == "EndsTensor" || var_name == "StridesTensor") { @@ -174,7 +172,7 @@ class StridedSliceOpGrad : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "StartsTensor" || var_name == "EndsTensor" || var_name == "StridesTensor") { diff --git a/paddle/fluid/operators/strided_slice_op_mlu.cc b/paddle/fluid/operators/strided_slice_op_mlu.cc index 6caf1ad5ad15f..21eb47f187b00 100644 --- a/paddle/fluid/operators/strided_slice_op_mlu.cc +++ b/paddle/fluid/operators/strided_slice_op_mlu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using Variable = framework::Variable; using LoDTensorArray = framework::LoDTensorArray; using DDim = framework::DDim; @@ -100,7 +99,7 @@ class StridedSliceMLUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(is_tensor_array, false, platform::errors::InvalidArgument( - "Tensor array as input is not supported.")); + "phi::DenseTensor array as input is not supported.")); int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: @@ -156,7 +155,7 @@ class StridedSliceMLUKernel : public framework::OpKernel { auto infer_flags = ctx.Attr>("infer_flags"); auto decrease_axis = ctx.Attr>("decrease_axis"); - // vector> + // vector> auto list_new_starts_tensor = ctx.MultiInput("StartsTensorList"); auto list_new_ends_tensor = @@ -164,7 +163,7 @@ class StridedSliceMLUKernel : public framework::OpKernel { auto list_new_strides_tensor = ctx.MultiInput("StridesTensorList"); - // Tensor + // phi::DenseTensor if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { @@ -268,7 +267,7 @@ class StridedSliceGradMLUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(is_tensor_array, false, platform::errors::InvalidArgument( - "Tensor array as input is not supported.")); + "phi::DenseTensor array as input is not supported.")); int rank = ctx.Input("Input")->dims().size(); switch (rank) { diff --git a/paddle/fluid/operators/strided_slice_op_npu.cc b/paddle/fluid/operators/strided_slice_op_npu.cc index f613dc1054088..23bf6ea689602 100644 --- a/paddle/fluid/operators/strided_slice_op_npu.cc +++ b/paddle/fluid/operators/strided_slice_op_npu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using Variable = framework::Variable; using LoDTensorArray = framework::LoDTensorArray; using DDim = framework::DDim; @@ -34,7 +33,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(is_tensor_array, false, platform::errors::InvalidArgument( - "Tensor array as input is not supported.")); + "phi::DenseTensor array as input is not supported.")); int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: @@ -87,7 +86,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { auto infer_flags = ctx.Attr>("infer_flags"); auto decrease_axis = ctx.Attr>("decrease_axis"); - // vector> + // vector> auto list_new_ends_tensor = ctx.MultiInput("EndsTensorList"); auto list_new_starts_tensor = @@ -95,7 +94,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { auto list_new_strides_tensor = ctx.MultiInput("StridesTensorList"); - // Tensor + // phi::DenseTensor if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { @@ -157,9 +156,9 @@ class StridedSliceNPUKernel : public framework::OpKernel { strides_indices_vector[axis_index] = strides[axis]; } - Tensor starts_indices_tensor; - Tensor ends_indices_tensor; - Tensor strides_indices_tensor; + phi::DenseTensor starts_indices_tensor; + phi::DenseTensor ends_indices_tensor; + phi::DenseTensor strides_indices_tensor; starts_indices_tensor.mutable_data({D}, place); ends_indices_tensor.mutable_data({D}, place); @@ -221,7 +220,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { runner.Run(stream); if (need_reverse) { - Tensor out_tmp; + phi::DenseTensor out_tmp; out_tmp.mutable_data(out_dims, place); paddle::framework::TensorCopy( *out, @@ -229,7 +228,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { ctx.template device_context(), &out_tmp); - Tensor reverse_axis; + phi::DenseTensor reverse_axis; std::vector reverse_axis_vector; for (size_t axis = 0; axis < axes.size(); axis++) { if (reverse_vector[axis] == 1) { @@ -261,7 +260,7 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(is_tensor_array, false, platform::errors::InvalidArgument( - "Tensor array as input is not supported.")); + "phi::DenseTensor array as input is not supported.")); int rank = ctx.Input("Input")->dims().size(); switch (rank) { @@ -378,9 +377,9 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { strides_indices_vector[axis_index] = strides[axis]; } - Tensor starts_indices_tensor; - Tensor ends_indices_tensor; - Tensor strides_indices_tensor; + phi::DenseTensor starts_indices_tensor; + phi::DenseTensor ends_indices_tensor; + phi::DenseTensor strides_indices_tensor; starts_indices_tensor.mutable_data({D}, place); ends_indices_tensor.mutable_data({D}, place); @@ -397,7 +396,7 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { for (int i = 0; i < input_dims.size(); i++) { input_dims_vector.push_back(input_dims[i]); } - Tensor input_dims_tensor; + phi::DenseTensor input_dims_tensor; paddle::framework::TensorFromVector( input_dims_vector, dev_ctx, &input_dims_tensor); @@ -417,7 +416,7 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { {"shrink_axis_mask", 0}}; if (need_reverse) { - Tensor reverse_axis; + phi::DenseTensor reverse_axis; std::vector reverse_axis_vector; for (size_t axis = 0; axis < axes.size(); axis++) { if (reverse_vector[axis] == 1) { @@ -429,7 +428,7 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { paddle::framework::TensorFromVector( reverse_axis_vector, dev_ctx, &reverse_axis); - Tensor dout_tmp; + phi::DenseTensor dout_tmp; dout_tmp.mutable_data(dout->dims(), place); const auto& runner_reverse = NpuOpRunner("ReverseV2", {*dout, reverse_axis}, {dout_tmp}); diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc index aad62e9ce2c33..a2f69a394902c 100644 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ b/paddle/fluid/operators/sum_op_mlu.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; template @@ -62,7 +61,7 @@ class SumMLUKernel : public framework::OpKernel { } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be Tensor or But got " + "Expected type of Output(out) must be phi::DenseTensor or But got " "unsupport type: %s.", framework::ToTypeName(out_var->Type()))); } diff --git a/paddle/fluid/operators/sum_op_npu.cc b/paddle/fluid/operators/sum_op_npu.cc index 20cc7ec18b8b7..afc489e2ab412 100644 --- a/paddle/fluid/operators/sum_op_npu.cc +++ b/paddle/fluid/operators/sum_op_npu.cc @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; template @@ -106,7 +105,7 @@ class SumNPUKernel : public framework::OpKernel { } } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) must be Tensor or " + "Expected type of Output(out) must be phi::DenseTensor or " "LoDTensorArray. But got " "unsupport type: %s.", framework::ToTypeName(out_var->Type()))); diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index d6c306ff2a9f3..6358722e94390 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -36,7 +36,6 @@ namespace paddle { namespace operators { namespace math { -using Tensor = phi::DenseTensor; using InTensors = std::vector; using OutTensors = std::vector; using OpName = std::string; @@ -274,8 +273,8 @@ template struct DeviceIndependenceTensorOperations { // 1. Device indenpendence, for kernel reuse. // 2. Input and output is always tensor type. - // 3. output Tensor is alway allocated - // 4. Basic Tensor operator is supported + // 3. output phi::DenseTensor is alway allocated + // 4. Basic phi::DenseTensor operator is supported // 5. The Reused Operator Kernel should only be considered as // a wrap function using NameInTensorMap = @@ -382,8 +381,8 @@ struct DeviceIndependenceTensorOperations { } // batch_diag for CPU only - Tensor BatchDiag(const phi::DenseTensor& x, int batch) { - Tensor out; + phi::DenseTensor BatchDiag(const phi::DenseTensor& x, int batch) { + phi::DenseTensor out; auto* x_data = x.data>(); auto numel = x.numel(); auto* out_data = out.mutable_data>( @@ -411,7 +410,8 @@ struct DeviceIndependenceTensorOperations { } // a complex number x times a real number y, which is represented as (a+0j) - Tensor RealMulComplex(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor RealMulComplex(const phi::DenseTensor& x, + const phi::DenseTensor& y) { phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); ret.Resize(phi::make_ddim(out_shape)); @@ -650,8 +650,8 @@ struct DeviceIndependenceTensorOperations { return CreateOpRunAndReturnTensor("concat", inputs, attrs, out_shape); } - Tensor Conj(const phi::DenseTensor& x) { - Tensor out; + phi::DenseTensor Conj(const phi::DenseTensor& x) { + phi::DenseTensor out; auto* out_data = out.mutable_data(x.dims(), context.GetPlace()); auto* x_data = x.data(); auto for_range = GetForRange(x.numel()); @@ -660,8 +660,8 @@ struct DeviceIndependenceTensorOperations { return out; } - Tensor Real(const phi::DenseTensor& x) { - Tensor out; + phi::DenseTensor Real(const phi::DenseTensor& x) { + phi::DenseTensor out; auto numel = x.numel(); auto* out_data = out.mutable_data>( x.dims(), @@ -674,13 +674,13 @@ struct DeviceIndependenceTensorOperations { return out; } - Tensor DiagFill(const int m, - const int n, - const int num_lower_diags, - const int num_upper_diags, - const phi::DenseTensor& scale, - const phi::DenseTensor& input) { - Tensor out; + phi::DenseTensor DiagFill(const int m, + const int n, + const int num_lower_diags, + const int num_upper_diags, + const phi::DenseTensor& scale, + const phi::DenseTensor& input) { + phi::DenseTensor out; auto& dev_ctx = context.template device_context(); platform::ForRange for_range(dev_ctx, input.numel()); DiagAndFillFunctor diag_and_copy_functor( @@ -709,7 +709,7 @@ struct DeviceIndependenceTensorOperations { const std::vector& start, const std::vector& end, phi::DenseTensor* out) { - // Slice by call Eigen Tensor Function `.slice()` + // Slice by call Eigen phi::DenseTensor Function `.slice()` size_t rank = in->dims().size(); PADDLE_ENFORCE_EQ(start.size(), rank, @@ -752,7 +752,7 @@ struct DeviceIndependenceTensorOperations { op_outputs[out_name].emplace_back("tmp_" + out_name); } auto out_var = local_scope.Var("tmp_Out"); // return the Out - // create Out Tensor and allocat memory + // create Out phi::DenseTensor and allocat memory out_var->GetMutable()->mutable_data( phi::make_ddim(out_shape), context.GetPlace()); // phi::make_ddim(out_shape) diff --git a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc index b1e6bec8a4cad..2d037a7c3ecc1 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc @@ -26,7 +26,6 @@ namespace operators { #define NO_USE_CNCL 0 #define GET_LAYOUT_OFFSET 2 -using Tensor = phi::DenseTensor; static std::vector supported_input_layout = { CNNL_LAYOUT_NC, CNNL_LAYOUT_NLC, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NDHWC}; @@ -81,8 +80,8 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { saved_mean->mutable_data(ctx.GetPlace()); saved_variance->mutable_data(ctx.GetPlace()); - Tensor trans_x; - Tensor trans_y; + phi::DenseTensor trans_x; + phi::DenseTensor trans_y; std::vector forward_perm; std::vector backward_perm; std::vector trans_shape; @@ -137,13 +136,13 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { } else { // training if (ctx.HasInput("MomentumTensor")) { const auto *mom_tensor = ctx.Input("MomentumTensor"); - Tensor mom_cpu; + phi::DenseTensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; } - Tensor local_mean, local_var; + phi::DenseTensor local_mean, local_var; local_mean.mutable_data(mean->dims(), ctx.GetPlace()); local_var.mutable_data(variance->dims(), ctx.GetPlace()); MLUCnnlTensorDesc desc_mean_var(*mean_out); @@ -158,14 +157,14 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { desc_mean_var.get(), GetBasePtr(&local_var)); - Tensor input_count; + phi::DenseTensor input_count; input_count.mutable_data(phi::make_ddim({1}), ctx.GetPlace()); FillMLUTensorWithHostValue( ctx, static_cast(x->numel() / C), &input_count); - Tensor count_all; - Tensor mean_all(mean->dtype()); - Tensor invstd_all(variance->dtype()); + phi::DenseTensor count_all; + phi::DenseTensor mean_all(mean->dtype()); + phi::DenseTensor invstd_all(variance->dtype()); #ifdef PADDLE_WITH_CNCL auto &dev_ctx = @@ -300,7 +299,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { const auto *saved_mean = ctx.Input("SavedMean"); const auto *saved_inv_var = ctx.Input("SavedVariance"); - const Tensor *x; + const phi::DenseTensor *x; if (ctx.HasInput("Y")) { PADDLE_ENFORCE_EQ(true, false, @@ -342,9 +341,9 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { "OP(sync_batch_norm) be (1), but given (%d).", scale->dims().size())); - Tensor trans_x; - Tensor trans_dy; - Tensor trans_dx; + phi::DenseTensor trans_x; + phi::DenseTensor trans_dy; + phi::DenseTensor trans_dx; std::vector forward_perm; std::vector backward_perm; std::vector trans_shape; @@ -384,7 +383,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { supported_input_layout[x_dims.size() - GET_LAYOUT_OFFSET], ToCnnlDataType()); - Tensor sum_dy, sum_dy_xmu; + phi::DenseTensor sum_dy, sum_dy_xmu; sum_dy.mutable_data(bias->dims(), ctx.GetPlace()); sum_dy_xmu.mutable_data(bias->dims(), ctx.GetPlace()); MLUCnnlTensorDesc desc_other_param(*bias); @@ -411,7 +410,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { d_scale ? true : false /*compute d_scale*/, d_bias ? true : false /*compute d_bias*/); - Tensor numel_count; + phi::DenseTensor numel_count; numel_count.mutable_data(phi::make_ddim({1}), ctx.GetPlace()); FillMLUTensorWithHostValue( ctx, static_cast(x->numel() / C), &numel_count); diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc index 6cfd753c4ab6e..46b1ccc140ddb 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc @@ -20,8 +20,6 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template void training_or_inference(const framework::ExecutionContext &ctx, const aclrtStream &stream, @@ -34,18 +32,18 @@ void training_or_inference(const framework::ExecutionContext &ctx, const int &W, const float epsilon, const float &momentum, - const Tensor *common_mean, - const Tensor *common_var, - const Tensor *x, - const Tensor *scale, - const Tensor *bias, - const Tensor *mean, - const Tensor *variance, - Tensor *mean_out, - Tensor *variance_out, - Tensor *saved_mean, - Tensor *saved_variance, - Tensor *y) { + const phi::DenseTensor *common_mean, + const phi::DenseTensor *common_var, + const phi::DenseTensor *x, + const phi::DenseTensor *scale, + const phi::DenseTensor *bias, + const phi::DenseTensor *mean, + const phi::DenseTensor *variance, + phi::DenseTensor *mean_out, + phi::DenseTensor *variance_out, + phi::DenseTensor *saved_mean, + phi::DenseTensor *saved_variance, + phi::DenseTensor *y) { std::vector axes; if (layout == phi::DataLayout::kNCHW) { axes = {0, 2, 3}; @@ -59,7 +57,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, else if (layout == phi::DataLayout::kNHWC) multiples = {N, H, W, 1}; - Tensor common_mean_tile_1; + phi::DenseTensor common_mean_tile_1; { common_mean_tile_1.Resize({C}); common_mean_tile_1.mutable_data(place); @@ -70,7 +68,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, common_mean_tile_1.Resize({1, 1, 1, C}); } - Tensor common_mean_tile; + phi::DenseTensor common_mean_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; common_mean_tile.Resize(x->dims()); @@ -80,7 +78,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor common_var_tile_1; + phi::DenseTensor common_var_tile_1; { common_var_tile_1.Resize({C}); common_var_tile_1.mutable_data(place); @@ -91,7 +89,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, common_var_tile_1.Resize({1, 1, 1, C}); } - Tensor common_var_tile; + phi::DenseTensor common_var_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; common_var_tile.Resize(x->dims()); @@ -101,7 +99,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor common_var_tile_add_epsilon; + phi::DenseTensor common_var_tile_add_epsilon; { framework::NPUAttributeMap attr_input = {{"value", epsilon}}; common_var_tile_add_epsilon.Resize(x->dims()); @@ -111,7 +109,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor common_var_tile_add_epsilon_sqrt; + phi::DenseTensor common_var_tile_add_epsilon_sqrt; { common_var_tile_add_epsilon_sqrt.Resize(x->dims()); common_var_tile_add_epsilon_sqrt.mutable_data(place); @@ -122,7 +120,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor x_sub_common_mean; + phi::DenseTensor x_sub_common_mean; { x_sub_common_mean.Resize(x->dims()); x_sub_common_mean.mutable_data(place); @@ -131,7 +129,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor normalized; + phi::DenseTensor normalized; { normalized.Resize(x->dims()); normalized.mutable_data(place); @@ -143,7 +141,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor scale_tile_1; + phi::DenseTensor scale_tile_1; { scale_tile_1.Resize({C}); scale_tile_1.mutable_data(place); @@ -154,7 +152,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, scale_tile_1.Resize({1, 1, 1, C}); } - Tensor scale_tile; + phi::DenseTensor scale_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; scale_tile.Resize(x->dims()); @@ -164,7 +162,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor normalized_mul_scale; + phi::DenseTensor normalized_mul_scale; { normalized_mul_scale.Resize(x->dims()); normalized_mul_scale.mutable_data(place); @@ -173,7 +171,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor bias_tile_1; + phi::DenseTensor bias_tile_1; { bias_tile_1.Resize({C}); bias_tile_1.mutable_data(place); @@ -184,7 +182,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, bias_tile_1.Resize({1, 1, 1, C}); } - Tensor bias_tile; + phi::DenseTensor bias_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; bias_tile.Resize(x->dims()); @@ -203,7 +201,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, } if (!test_mode) { - Tensor ones; + phi::DenseTensor ones; { ones.Resize({C}); ones.mutable_data(place); @@ -212,7 +210,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, // cacl mean_out { - Tensor common_mean_mul_1_sub_momentum; + phi::DenseTensor common_mean_mul_1_sub_momentum; { framework::NPUAttributeMap attr_input = {{"value", 1 - momentum}}; common_mean_mul_1_sub_momentum.Resize({C}); @@ -224,7 +222,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor mean_mul_momentum; + phi::DenseTensor mean_mul_momentum; { framework::NPUAttributeMap attr_input = {{"value", momentum}}; mean_mul_momentum.Resize({C}); @@ -246,7 +244,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, // cacl variance_out { - Tensor momentum_mul_var; + phi::DenseTensor momentum_mul_var; { framework::NPUAttributeMap attr_input = {{"value", momentum}}; momentum_mul_var.Resize({C}); @@ -256,7 +254,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor var_ref_mul_1_sub_momentum; + phi::DenseTensor var_ref_mul_1_sub_momentum; { framework::NPUAttributeMap attr_input = {{"value", 1 - momentum}}; var_ref_mul_1_sub_momentum.Resize({C}); @@ -278,7 +276,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, // cacl saved_variance { - Tensor var_ref_add_epsilon; + phi::DenseTensor var_ref_add_epsilon; { framework::NPUAttributeMap attr_input = {{"value", epsilon}}; var_ref_add_epsilon.Resize({C}); @@ -288,7 +286,7 @@ void training_or_inference(const framework::ExecutionContext &ctx, runner.Run(stream); } - Tensor var_ref_add_epsilon_sqrt; + phi::DenseTensor var_ref_add_epsilon_sqrt; { var_ref_add_epsilon_sqrt.Resize({C}); var_ref_add_epsilon_sqrt.mutable_data(place); @@ -399,18 +397,18 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { } else { // training if (ctx.HasInput("MomentumTensor")) { const auto *mom_tensor = ctx.Input("MomentumTensor"); - Tensor mom_cpu; + phi::DenseTensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; } // cacl saved_mean and var_ref - Tensor var_ref; + phi::DenseTensor var_ref; var_ref.Resize({C}); var_ref.mutable_data(place); { - Tensor x_sum; + phi::DenseTensor x_sum; { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; @@ -421,7 +419,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { runner.Run(stream); } - Tensor x_square; + phi::DenseTensor x_square; { x_square.Resize(x->dims()); x_square.mutable_data(place); @@ -429,7 +427,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { runner.Run(stream); } - Tensor x_square_sum; + phi::DenseTensor x_square_sum; { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; @@ -447,7 +445,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { HcclDataType dtype = platform::ToHCCLDataType( framework::TransToProtoVarType(mean_out->dtype())); - Tensor device_count_tensor; + phi::DenseTensor device_count_tensor; { device_count_tensor.Resize({1}); device_count_tensor.mutable_data(place); @@ -517,7 +515,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { // cacl var_ref { - Tensor saved_mean_square; + phi::DenseTensor saved_mean_square; { saved_mean_square.Resize({C}); saved_mean_square.mutable_data(place); @@ -526,7 +524,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { runner.Run(stream); } - Tensor var_ref_tmp; + phi::DenseTensor var_ref_tmp; var_ref_tmp.Resize({C}); var_ref_tmp.mutable_data(place); { @@ -589,7 +587,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { auto *d_bias = ctx.Output(framework::GradVarName("Bias")); const auto *saved_mean = ctx.Input("SavedMean"); - const Tensor *x; + const phi::DenseTensor *x; if (ctx.HasInput("Y")) { PADDLE_ENFORCE_EQ(true, false, @@ -627,7 +625,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { float device_counts = 0.0; if (comm) { - Tensor device_count_tensor; + phi::DenseTensor device_count_tensor; { device_count_tensor.Resize({1}); device_count_tensor.mutable_data(place); @@ -660,13 +658,13 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { } // cacl var_ref - Tensor var_ref; + phi::DenseTensor var_ref; var_ref.Resize({C}); var_ref.mutable_data(place); { // cacl var_ref { - Tensor x_square; + phi::DenseTensor x_square; { x_square.Resize(x->dims()); x_square.mutable_data(place); @@ -674,7 +672,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor x_square_sum; + phi::DenseTensor x_square_sum; { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; @@ -685,7 +683,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor x_square_sum_mean; + phi::DenseTensor x_square_sum_mean; { framework::NPUAttributeMap attr_input = { {"value", 1.0f * C / x_numel}}; @@ -696,7 +694,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor mean_square; + phi::DenseTensor mean_square; { mean_square.Resize({C}); mean_square.mutable_data(place); @@ -714,7 +712,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { } } - Tensor saved_mean_tile_1; + phi::DenseTensor saved_mean_tile_1; { saved_mean_tile_1.Resize({C}); saved_mean_tile_1.mutable_data(place); @@ -725,7 +723,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { saved_mean_tile_1.Resize({1, 1, 1, C}); } - Tensor saved_mean_tile; + phi::DenseTensor saved_mean_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; saved_mean_tile.Resize(x->dims()); @@ -735,7 +733,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor x_sub_saved_mean; + phi::DenseTensor x_sub_saved_mean; { x_sub_saved_mean.Resize(x->dims()); x_sub_saved_mean.mutable_data(place); @@ -744,7 +742,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor var_ref_tile_1; + phi::DenseTensor var_ref_tile_1; { var_ref_tile_1.Resize({C}); var_ref_tile_1.mutable_data(place); @@ -755,7 +753,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { var_ref_tile_1.Resize({1, 1, 1, C}); } - Tensor var_ref_tile; + phi::DenseTensor var_ref_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; var_ref_tile.Resize(x->dims()); @@ -765,7 +763,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor var_ref_tile_add_epsilon; + phi::DenseTensor var_ref_tile_add_epsilon; { framework::NPUAttributeMap attr_input = {{"value", epsilon}}; var_ref_tile_add_epsilon.Resize(x->dims()); @@ -775,7 +773,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor var_ref_tile_add_epsilon_sqrt; + phi::DenseTensor var_ref_tile_add_epsilon_sqrt; { var_ref_tile_add_epsilon_sqrt.Resize(x->dims()); var_ref_tile_add_epsilon_sqrt.mutable_data(place); @@ -786,7 +784,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor dy_mul_x_sub_mean_for_scale; + phi::DenseTensor dy_mul_x_sub_mean_for_scale; { if (framework::TransToProtoVarType(d_y->dtype()) == framework::proto::VarType::FP16) { @@ -804,7 +802,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { } } - Tensor dy_mul_x_sub_mean; + phi::DenseTensor dy_mul_x_sub_mean; { if (framework::TransToProtoVarType(d_y->dtype()) == framework::proto::VarType::FP16) { @@ -849,7 +847,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // cacl d_x if (d_x) { - Tensor dy_mean; + phi::DenseTensor dy_mean; { if (framework::TransToProtoVarType(d_y->dtype()) == framework::proto::VarType::FP16) { @@ -896,7 +894,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { } } - Tensor dy_mean_tile_1; + phi::DenseTensor dy_mean_tile_1; { dy_mean_tile_1.Resize({C}); dy_mean_tile_1.mutable_data(place); @@ -907,7 +905,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { dy_mean_tile_1.Resize({1, 1, 1, C}); } - Tensor dy_mean_tile; + phi::DenseTensor dy_mean_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; dy_mean_tile.Resize(x->dims()); @@ -917,7 +915,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor dy_sub_dy_mean; + phi::DenseTensor dy_sub_dy_mean; { if (framework::TransToProtoVarType(d_y->dtype()) == framework::proto::VarType::FP16) { @@ -935,7 +933,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { } } - Tensor dy_mul_x_sub_mean_mean; + phi::DenseTensor dy_mul_x_sub_mean_mean; { framework::NPUAttributeMap attr_input = {{"keep_dims", false}, {"axes", axes}}; @@ -948,7 +946,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor dy_mul_x_sub_mean_mean_tile_1; + phi::DenseTensor dy_mul_x_sub_mean_mean_tile_1; { dy_mul_x_sub_mean_mean_tile_1.Resize({C}); dy_mul_x_sub_mean_mean_tile_1.mutable_data(place); @@ -960,7 +958,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { dy_mul_x_sub_mean_mean_tile_1.Resize({1, 1, 1, C}); } - Tensor dy_mul_x_sub_mean_mean_tile; + phi::DenseTensor dy_mul_x_sub_mean_mean_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; dy_mul_x_sub_mean_mean_tile.Resize(x->dims()); @@ -974,7 +972,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // (x - mean) * np.mean(dy * (x - mean), axis=axis) // x_sub_saved_mean * dy_mul_x_sub_mean_mean_tile - Tensor tmp1; + phi::DenseTensor tmp1; { tmp1.Resize(x->dims()); tmp1.mutable_data(place); @@ -986,7 +984,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // (x - mean) * np.mean(dy * (x - mean), axis=axis) / (var + epsilon) // tmp1 / (var + epsilon) // tmp1 / var_ref_tile_add_epsilon - Tensor tmp2; + phi::DenseTensor tmp2; { tmp2.Resize(x->dims()); tmp2.mutable_data(place); @@ -998,7 +996,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // dy - np.mean(dy, axis) - (x - mean) * np.mean(dy * (x - mean), axis) / // (var + epsilon) // dy_sub_dy_mean - tmp2 - Tensor tmp3; + phi::DenseTensor tmp3; { tmp3.Resize(x->dims()); tmp3.mutable_data(place); @@ -1007,7 +1005,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { runner.Run(stream); } - Tensor scale_tile_1; + phi::DenseTensor scale_tile_1; { scale_tile_1.Resize({C}); scale_tile_1.mutable_data(place); @@ -1018,7 +1016,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { scale_tile_1.Resize({1, 1, 1, C}); } - Tensor scale_tile; + phi::DenseTensor scale_tile; { framework::NPUAttributeMap attr_input = {{"multiples", multiples}}; scale_tile.Resize(x->dims()); @@ -1031,7 +1029,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // scale * (dy - np.mean(dy, axis) - (x - mean) * np.mean(dy * (x - mean), // axis) / (var + epsilon)) // scale * tmp3 - Tensor dx_1; + phi::DenseTensor dx_1; { dx_1.Resize(x->dims()); dx_1.mutable_data(place); @@ -1052,7 +1050,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { // cacl d_scale if (d_scale) { - Tensor d_scale_2; + phi::DenseTensor d_scale_2; { d_scale_2.Resize(x->dims()); d_scale_2.mutable_data(place); diff --git a/paddle/fluid/operators/take_along_axis_op_npu.cc b/paddle/fluid/operators/take_along_axis_op_npu.cc index d4f06e6446887..3eed4989bb7ea 100644 --- a/paddle/fluid/operators/take_along_axis_op_npu.cc +++ b/paddle/fluid/operators/take_along_axis_op_npu.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class NPUTakeAlongAxisKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index 3f781ab65eeb8..b41453b849bc4 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -28,7 +28,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index d98680c574154..1ba0e2c66be8d 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -29,7 +29,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using Sampler = math::Sampler; using DDim = framework::DDim; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index f880181662e24..bad4479868053 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -201,15 +199,17 @@ class TeacherStudentSigmoidLossOpMaker public: void Make() override { AddInput("X", - "(Tensor, default Tensor), a 2-D tensor with shape [N x 1]," + "(phi::DenseTensor, default phi::DenseTensor), a 2-D " + "tensor with shape [N x 1]," " where N is the batch size and D is the output. " "This input is a probability computed by the previous operator, " "which is almost always the result of a softmax operator."); AddInput("Label", - "(Tensor), the ground truth which is a 2-D tensor. " - "Label is a Tensor with shape [N x 1]. "); + "(phi::DenseTensor), the ground truth which is a 2-D tensor. " + "Label is a phi::DenseTensor with shape [N x 1]. "); AddOutput("Y", - "(Tensor, default Tensor), a 2-D tensor with shape " + "(phi::DenseTensor, default phi::DenseTensor), a 2-D " + "tensor with shape " "[N x 1]. The teacher student sigmoid loss."); AddAttr( "soft_max_up_bound", diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h index 40bac8c364583..133d9656284f3 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index 5ea2ead118892..ec2533316e107 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -16,7 +16,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DataLayout = phi::DataLayout; template diff --git a/paddle/fluid/operators/tile_op_mlu.cc b/paddle/fluid/operators/tile_op_mlu.cc index 2b2b3df4431f1..3660627b8b578 100644 --- a/paddle/fluid/operators/tile_op_mlu.cc +++ b/paddle/fluid/operators/tile_op_mlu.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class TileMLUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/tile_op_npu.cc b/paddle/fluid/operators/tile_op_npu.cc index 2997052257d18..4ae1f6cbed330 100644 --- a/paddle/fluid/operators/tile_op_npu.cc +++ b/paddle/fluid/operators/tile_op_npu.cc @@ -18,7 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index ab90fa78d3d45..f1674bc5005a0 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -30,8 +30,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - #define FIXED_BLOCK_DIM_BASE(dim, ...) \ case (dim): { \ constexpr auto kBlockDim = (dim); \ @@ -74,7 +72,7 @@ class TopkOpCUDAKernel : public framework::OpKernel { auto* k_t = ctx.Input("K"); if (k_t) { - Tensor k_host; + phi::DenseTensor k_host; framework::TensorCopySync(*k_t, platform::CPUPlace(), &k_host); k = k_host.data()[0]; framework::DDim output_dims = output->dims(); diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index cd29137d530f4..27f246415a94c 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class TopkKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc index cbe5c224ae1d3..5b9b507989952 100644 --- a/paddle/fluid/operators/top_k_op_npu.cc +++ b/paddle/fluid/operators/top_k_op_npu.cc @@ -65,7 +65,7 @@ class TopkNPUKernel : public framework::OpKernel { {"dim", -1}, {"largest", true}}; - Tensor tmp_indices(experimental::DataType::INT32); + phi::DenseTensor tmp_indices(experimental::DataType::INT32); tmp_indices.Resize(indices->dims()); tmp_indices.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/top_k_op_xpu.cc b/paddle/fluid/operators/top_k_op_xpu.cc index 25f3faa38a0c5..df1725265ebde 100644 --- a/paddle/fluid/operators/top_k_op_xpu.cc +++ b/paddle/fluid/operators/top_k_op_xpu.cc @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; template class TopkXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h index ee37c2e9fe09b..cab0796a71019 100644 --- a/paddle/fluid/operators/tree_conv_op.h +++ b/paddle/fluid/operators/tree_conv_op.h @@ -22,7 +22,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using DDim = framework::DDim; template class TreeConvKernel : public framework::OpKernel { @@ -40,7 +39,7 @@ class TreeConvKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); auto blas = phi::funcs::GetBlas(dev_ctx); - Tensor W; + phi::DenseTensor W; W.ShareDataWith(*Filter); W.Resize(phi::flatten_to_2d(Filter->dims(), 2)); @@ -67,7 +66,7 @@ class TreeConvKernel : public framework::OpKernel { auto embeddings = Embeddings->Slice(idx, idx + 1).Resize(embedding_slicedim); auto out_vec = output_emb->Slice(idx, idx + 1).Resize(output_slicedim); - Tensor patch; + phi::DenseTensor patch; tree2col(dev_ctx, edge_set, embeddings, &patch, max_depth); constant(dev_ctx, &out_vec, 0); blas.MatMul(patch, W, &out_vec); @@ -93,7 +92,7 @@ class TreeConvGradKernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); auto blas = phi::funcs::GetBlas(dev_ctx); - Tensor W; + phi::DenseTensor W; W.ShareDataWith(*Filter); W.Resize(phi::flatten_to_2d(Filter->dims(), 1)); @@ -110,7 +109,7 @@ class TreeConvGradKernel : public framework::OpKernel { out_grad_dims = phi::flatten_to_2d(out_grad_dims, 1); if (filter_g) { filter_g->mutable_data(Filter->dims(), ctx.GetPlace()); - Tensor f_g; + phi::DenseTensor f_g; f_g.ShareDataWith(*filter_g); f_g.Resize(phi::flatten_to_2d(Filter->dims(), 2)); constant(dev_ctx, filter_g, 0); @@ -121,7 +120,7 @@ class TreeConvGradKernel : public framework::OpKernel { .Resize(embedding_slicedim); auto out_grad = out_g->Slice(batch_id, batch_id + 1).Resize(out_grad_dims); - Tensor patch; + phi::DenseTensor patch; tree2col(dev_ctx, edge_set, embeddings, &patch, max_depth); blas.MatMul(patch, true, out_grad, false, T(1.0), &f_g, T(1.0)); } @@ -138,7 +137,7 @@ class TreeConvGradKernel : public framework::OpKernel { out_g->Slice(batch_id, batch_id + 1).Resize(out_grad_dims); auto in_grad = in_g->Slice(batch_id, batch_id + 1).Resize(input_grad_dims); - Tensor in_grad_temp; + phi::DenseTensor in_grad_temp; col2tree(dev_ctx, edge_set, out_grad, &in_grad_temp, max_depth); blas.MatMul(in_grad_temp, false, W, true, &in_grad); } diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc index b5e67ccb24a9a..c3b2e24892e40 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc @@ -22,37 +22,35 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class TruncatedGaussianRandomNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // TODO(zhiqiu): support dynamic shape and call ParameterizedTruncatedNormal std::vector shape = ctx.Attr>("shape"); - Tensor shape_tensor(experimental::DataType::INT32); + phi::DenseTensor shape_tensor(experimental::DataType::INT32); shape_tensor.mutable_data({static_cast(shape.size())}, ctx.GetPlace()); paddle::framework::TensorFromVector( shape, ctx.device_context(), &shape_tensor); float mean = ctx.Attr("mean"); - Tensor mean_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor mean_tensor(experimental::DataType::FLOAT32); mean_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&mean_tensor, mean); float std = ctx.Attr("std"); - Tensor std_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor std_tensor(experimental::DataType::FLOAT32); std_tensor.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&std_tensor, std); int32_t seed_var = ctx.Attr("seed"); - Tensor min_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor min_tensor(experimental::DataType::FLOAT32); min_tensor.mutable_data({1}, ctx.GetPlace()); float min_value = mean - std * 2.0; FillNpuTensorWithConstant(&min_tensor, min_value); - Tensor max_tensor(experimental::DataType::FLOAT32); + phi::DenseTensor max_tensor(experimental::DataType::FLOAT32); max_tensor.mutable_data({1}, ctx.GetPlace()); float max_value = mean + std * 2.0; FillNpuTensorWithConstant(&max_tensor, max_value); @@ -83,7 +81,7 @@ class NPUTruncatedGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - Tensor cpu_tensor(tensor->dtype()); + phi::DenseTensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); std::uniform_real_distribution dist(std::numeric_limits::min(), diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 0da82f73028d9..7ba22baff99b9 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -145,7 +145,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensorList" || var_name == "ShapeTensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu index 8dd7a140ae914..4c60cb76fb9ea 100644 --- a/paddle/fluid/operators/uniform_random_op.cu +++ b/paddle/fluid/operators/uniform_random_op.cu @@ -46,7 +46,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) in uniform_random_op must be Tensor, " + "Expected type of Output(out) in uniform_random_op must be " + "phi::DenseTensor, " "SelectedRows. But got " "unsupport type: %s.", framework::ToTypeName(out_var->Type()))); diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 3ddf6092f04bf..05a643b33b215 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -30,7 +30,6 @@ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; inline std::vector GetNewDataFromShapeTensor( const phi::DenseTensor* new_data_tensor) { diff --git a/paddle/fluid/operators/uniform_random_op_mlu.cc b/paddle/fluid/operators/uniform_random_op_mlu.cc index 1f7f3e2f2bad3..8e5f61c831088 100644 --- a/paddle/fluid/operators/uniform_random_op_mlu.cc +++ b/paddle/fluid/operators/uniform_random_op_mlu.cc @@ -50,7 +50,8 @@ class MLUUniformRandomKernel : public framework::OpKernel { if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) in uniform_random_op must be Tensor, " + "Expected type of Output(out) in uniform_random_op must be " + "phi::DenseTensor, " "SelectedRows. But got " "unsupport type: %s.", framework::ToTypeName(out_var->Type()))); @@ -59,7 +60,7 @@ class MLUUniformRandomKernel : public framework::OpKernel { tensor->mutable_data(ctx.GetPlace()); int64_t size = tensor->numel(); - Tensor cpu_tensor(tensor->dtype()); + phi::DenseTensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T *data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/operators/uniform_random_op_npu.cc b/paddle/fluid/operators/uniform_random_op_npu.cc index b1499b30fede7..e82c6e1f2a91a 100644 --- a/paddle/fluid/operators/uniform_random_op_npu.cc +++ b/paddle/fluid/operators/uniform_random_op_npu.cc @@ -52,7 +52,8 @@ class NPUUniformRandomKernel : public framework::OpKernel { if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Expected type of Output(out) in uniform_random_op must be Tensor, " + "Expected type of Output(out) in uniform_random_op must be " + "phi::DenseTensor, " "SelectedRows. But got " "unsupport type: %s.", framework::ToTypeName(out_var->Type()))); @@ -60,7 +61,7 @@ class NPUUniformRandomKernel : public framework::OpKernel { tensor->mutable_data(ctx.GetPlace()); int64_t size = tensor->numel(); - Tensor cpu_tensor(tensor->dtype()); + phi::DenseTensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T *data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index 35118ae64876c..b470874f26083 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoD = framework::LoD; void VarConv2dOpMaker::Make() { @@ -36,7 +35,7 @@ void VarConv2dOpMaker::Make() { "(phi::DenseTensor) the row variable provides lod information"); AddInput("COLUMN", "(phi::DenseTensor) the column variable provides lod information"); - AddInput("W", "W (Tensor), the filter."); + AddInput("W", "W (phi::DenseTensor), the filter."); AddAttr("InputChannel", "the input filter num").SetDefault(1); AddAttr("OutputChannel", "the output filter num").SetDefault(1); AddAttr("StrideH", "the height of Stride").SetDefault(1); @@ -130,11 +129,11 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { framework::Variable* x_var = PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]); const auto& x_lod = x_var->Get().lod(); - PADDLE_ENFORCE_EQ( - !x_lod.empty(), - true, - platform::errors::InvalidArgument("The Input(X) Tensor of VarConv2dOP " - "does not contain LoD information.")); + PADDLE_ENFORCE_EQ(!x_lod.empty(), + true, + platform::errors::InvalidArgument( + "The Input(X) phi::DenseTensor of VarConv2dOP " + "does not contain LoD information.")); PADDLE_ENFORCE_GE(x_lod.size(), 1, @@ -151,20 +150,22 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { framework::Variable* row_var = PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("ROW")[0]); const auto& row_lod = row_var->Get().lod(); - PADDLE_ENFORCE_EQ(!row_lod.empty(), - true, - platform::errors::InvalidArgument( - "The Input(ROW) Tensor of VarConv2dOP does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + !row_lod.empty(), + true, + platform::errors::InvalidArgument( + "The Input(ROW) phi::DenseTensor of VarConv2dOP does not " + "contain LoD information.")); framework::Variable* col_var = PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("COLUMN")[0]); const auto& col_lod = col_var->Get().lod(); - PADDLE_ENFORCE_EQ(!col_lod.empty(), - true, - platform::errors::InvalidArgument( - "The Input(COLUMN) Tensor of VarConv2dOP does not " - "contain LoD information.")); + PADDLE_ENFORCE_EQ( + !col_lod.empty(), + true, + platform::errors::InvalidArgument( + "The Input(COLUMN) phi::DenseTensor of VarConv2dOP does not " + "contain LoD information.")); } else { std::vector out_dims_vec{-1}; out_dims_vec.push_back(1); @@ -468,7 +469,7 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel { auto* dx = ctx.Output(framework::GradVarName("X")); auto* d_w = ctx.Output(framework::GradVarName("W")); - Tensor col_grad; + phi::DenseTensor col_grad; col_grad.Resize(col->dims()); auto* col_diff = col_grad.mutable_data(ctx.GetPlace()); auto* dx_data = dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/var_conv_2d_op.h b/paddle/fluid/operators/var_conv_2d_op.h index 1a5fa9de2c7ce..cc0c97e671e8a 100644 --- a/paddle/fluid/operators/var_conv_2d_op.h +++ b/paddle/fluid/operators/var_conv_2d_op.h @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; using LoD = framework::LoD; class VarConv2dOP : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/where_index_op_mlu.cc b/paddle/fluid/operators/where_index_op_mlu.cc index 85f463f723ef5..59ffb43f7ce5c 100644 --- a/paddle/fluid/operators/where_index_op_mlu.cc +++ b/paddle/fluid/operators/where_index_op_mlu.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class MLUWhereIndexKernel : public framework::OpKernel { public: @@ -31,7 +29,7 @@ class MLUWhereIndexKernel : public framework::OpKernel { auto dims = condition->dims(); const int rank = dims.size(); - Tensor num_true; + phi::DenseTensor num_true; num_true.mutable_data({1}, context.GetPlace()); MLUCnnlTensorDesc con_desc(*condition); MLUCnnlTensorDesc num_true_desc(num_true); @@ -41,7 +39,7 @@ class MLUWhereIndexKernel : public framework::OpKernel { num_true_desc.get(), GetBasePtr(&num_true)); - Tensor local_true_num; + phi::DenseTensor local_true_num; paddle::framework::TensorCopySync( num_true, platform::CPUPlace(), &local_true_num); auto true_num = *local_true_num.data(); diff --git a/paddle/fluid/operators/where_index_op_npu.cc b/paddle/fluid/operators/where_index_op_npu.cc index 5b006cbdcf1b0..d888513c2ebd2 100644 --- a/paddle/fluid/operators/where_index_op_npu.cc +++ b/paddle/fluid/operators/where_index_op_npu.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = phi::DenseTensor; - template class NPUWhereIndexKernel : public framework::OpKernel { public: @@ -39,7 +37,7 @@ class NPUWhereIndexKernel : public framework::OpKernel { const aclrtStream& stream = dev_ctx.stream(); // Run Cast and ReduceSum to get 0 dim of Out - Tensor booled_cond; + phi::DenseTensor booled_cond; if (framework::TransToProtoVarType(condition->dtype()) != framework::proto::VarType::BOOL) { auto bool_type = ConvertToNpuDtype(framework::proto::VarType::BOOL); @@ -53,7 +51,7 @@ class NPUWhereIndexKernel : public framework::OpKernel { } else { booled_cond.ShareDataWith(*condition); } - Tensor casted_cond; + phi::DenseTensor casted_cond; auto dst_dtype = ConvertToNpuDtype(framework::proto::VarType::INT64); casted_cond.mutable_data(dims, place); const auto& cast_runner = @@ -63,9 +61,9 @@ class NPUWhereIndexKernel : public framework::OpKernel { {{"dst_type", static_cast(dst_dtype)}}); cast_runner.Run(stream); - Tensor sumed_true_num; + phi::DenseTensor sumed_true_num; sumed_true_num.mutable_data({1}, place); - Tensor cond_axes; + phi::DenseTensor cond_axes; cond_axes.mutable_data({dims.size()}, place); std::vector axes_vec; for (int i = 0; i < dims.size(); ++i) { @@ -78,7 +76,7 @@ class NPUWhereIndexKernel : public framework::OpKernel { {{"keep_dims", false}}); sum_runner.Run(stream); - Tensor local_true_num; + phi::DenseTensor local_true_num; paddle::framework::TensorCopySync( sumed_true_num, platform::CPUPlace(), &local_true_num); auto true_num = *local_true_num.data();