PaddlePaddle
diff --git a/‎.github/workflows/Api-Benchmark-baseline.yml‎
Lines changed: 15 additions & 39 deletions b/‎.github/workflows/Api-Benchmark-baseline.yml‎
Lines changed: 15 additions & 39 deletions
diff --git a/‎.github/workflows/_Api-Benchmark.yml‎
Lines changed: 5 additions & 4 deletions b/‎.github/workflows/_Api-Benchmark.yml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎ci/coverage_test.sh‎
Lines changed: 6 additions & 2 deletions b/‎ci/coverage_test.sh‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎paddle/fluid/eager/to_static/run_program_impl.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/eager/to_static/run_program_impl.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/pybind/eager_utils.h‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/pybind/eager_utils.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/pybind/slice_utils.h‎
Lines changed: 76 additions & 0 deletions b/‎paddle/fluid/pybind/slice_utils.h‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 12 additions & 0 deletions b/‎paddle/phi/ops/yaml/ops.yaml‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎paddle/scripts/paddle_build.sh‎
Lines changed: 6 additions & 2 deletions b/‎paddle/scripts/paddle_build.sh‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎python/paddle/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎python/paddle/__init__.py‎
Lines changed: 6 additions & 0 deletions
@@ -18,7 +18,6 @@ on:
           - others
   schedule:
     - cron: '0 21 * * *'
-    - cron: '0 22 * * 3'
 
 permissions: read-all
 
@@ -52,7 +51,7 @@ jobs:
       is_pr: 'false'
 
   api-benchmark-baseline-schedule:
-    name: Api benchmark baseline
+    name: Api benchmark baseline with schedule
     strategy:
       matrix:
         run-labels: [api-bm-20, api-bm-27]
@@ -63,49 +62,26 @@ jobs:
       baseline: 'true'
       run-labels: ${{ matrix.run-labels }}
 
-  api-benchmark-baseline-pr:
-    name: Api benchmark baseline
+  api-benchmark-baseline-pr-20:
+    name: Api benchmark baseline with PR on 20
     if: github.event_name == 'workflow_dispatch' && github.event.inputs.job-name == 'api-benchmark'
-    strategy:
-      matrix:
-        run-labels: [api-bm-20, api-bm-27]
     uses: ./.github/workflows/_Api-Benchmark.yml
     needs: [clone, build-docker]
     with:
       docker_build_image: ${{ needs.build-docker.outputs.docker_build_image }}
       baseline: 'true'
       MANUALLY_PR_ID: ${{ inputs.PR_ID }}
       MANUALLY_COMMIT_ID: ${{ inputs.COMMIT_ID }}
-      run-labels: ${{ matrix.run-labels }}
+      run-labels: api-bm-20
 
-  test1:
-    runs-on: ubuntu-latest
-    if: github.event.schedule == '0 0 * * *'
-    steps:
-      - name: Test
-        run: |
-          echo "test1"
-
-  test2:
-    runs-on: ubuntu-latest
-    if: github.event.schedule == '0 21 * * *'
-    steps:
-      - name: Test
-        run: |
-          echo "test2"
-
-  test3:
-    runs-on: ubuntu-latest
-    if: github.event.schedule == '0 22 * * 3'
-    steps:
-      - name: Test
-        run: |
-          echo "test3"
-
-  test4:
-    runs-on: ubuntu-latest
-    if: github.event.schedule == '0 21 * * 1'
-    steps:
-      - name: Test
-        run: |
-          echo "test4"
+  api-benchmark-baseline-pr-27:
+    name: Api benchmark baseline with PR on 27
+    if: github.event_name == 'workflow_dispatch' && github.event.inputs.job-name == 'api-benchmark'
+    uses: ./.github/workflows/_Api-Benchmark.yml
+    needs: [clone, build-docker]
+    with:
+      docker_build_image: ${{ needs.build-docker.outputs.docker_build_image }}
+      baseline: 'true'
+      MANUALLY_PR_ID: ${{ inputs.PR_ID }}
+      MANUALLY_COMMIT_ID: ${{ inputs.COMMIT_ID }}
+      run-labels: api-bm-27
@@ -31,7 +31,7 @@ env:
   PADDLE_ROOT: /paddle
   TASK: paddle-CI-${{ github.event.pull_request.number }}-api-benchmark
   ci_scripts: /paddle/ci
-  BRANCH: ${{ github.event.pull_request.base.ref }}
+  BRANCH: ${{ github.event.pull_request.base.ref || github.ref_name }}
   CI_name: api-benchmark
   no_proxy: "bcebos.com,apiin.im.baidu.com,gitee.com,aliyun.com,.baidu.com,.tuna.tsinghua.edu.cn"
 
@@ -76,7 +76,7 @@ jobs:
       - name: Check docker image and run container
         env:
           python: "python3.10"
-          GIT_PR_ID: ${{ github.event.pull_request.number }}
+          GIT_PR_ID: ${{ github.event.pull_request.number || '0' }}
           GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           RUN_ID: ${{ github.run_id }}
           wheel_link: https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
@@ -134,13 +134,14 @@ jobs:
           cp /paddle/PTSTools/Uploader/apibm_config.yml .
           source ${{ github.workspace }}/../../../proxy
           if [[ "${{ inputs.baseline }}" == "true" ]];then
+            set -e
             if [[ "${{ inputs.MANUALLY_PR_ID }}" == "" ]]; then
-              export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/${{ github.event.pull_request.number }}/${{ github.event.pull_request.head.sha }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
+              export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/$PR_ID/$COMMIT_ID/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
             else
               export pr_wheel_link=https://paddle-github-action.bj.bcebos.com/PR/build/${{ inputs.MANUALLY_PR_ID }}/${{ inputs.MANUALLY_COMMIT_ID }}/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl
             fi
             ${python} -m pip install $pr_wheel_link
-            ${python} runner_ci_action.py --yaml ../yaml/api_benchmark_fp32.yml --baseline_whl_link $pr_wheel_link
+            ${python} runner_ci_multipro_action.py --yaml ../yaml/sort_api_benchmark_fp32.yml --core_index ${core_index} --baseline_whl_link $pr_wheel_link
             exit 0
           fi
           ${python} -m pip install $wheel_link
 
@@ -24,10 +24,14 @@ function is_run_distribute_in_op_test() {
             echo "export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1" >> "$HOME/.bashrc"
         fi
     done
-    ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"|| true`
+    ALL_CHANGE_FILES=$(git diff --name-only upstream/$BRANCH | grep ".py"|| true)
     echo ${ALL_CHANGE_FILES}
     for CHANGE_FILE in ${ALL_CHANGE_FILES}; do
-        ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${CHANGE_FILE} | grep "+" | grep "check_auto_parallel=" || true`
+        TARGET_FILE="${PADDLE_ROOT}/${CHANGE_FILE}"
+        if [ ! -f "$TARGET_FILE" ]; then
+            continue
+        fi
+        ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH "TARGET_FILE" | grep "+" | grep "check_auto_parallel=" || true`
         if [ "${ALL_OPTEST_BAN_AUTO_PARALLEL_TEST}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
             export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1
             echo "export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1" >> "$HOME/.bashrc"
 
@@ -573,7 +573,7 @@ std::vector<paddle::Tensor> RunProgramImpl(
 #endif
 
     auto passed_kernel_program = paddle::framework::ApplyIrPass(
-        forward_program.get(), place, no_need_buffer_name_set);
+        program.get(), place, no_need_buffer_name_set);
     const auto &new_block = passed_kernel_program->block();
     passed_kernel_program = paddle::framework::ApplyRemoveShadowFeedPass(
         std::move(passed_kernel_program), new_block, place, global_inner_scope);
 
@@ -479,7 +479,7 @@ class TensorListBufferAllocator {
     bool is_available;
     std::vector<paddle::Tensor> buffer;
     TensorListBuffer() = default;
-    explicit TensorListBuffer(ssize_t len) : buffer(len), is_available(true) {}
+    explicit TensorListBuffer(ssize_t len) : is_available(true), buffer(len) {}
   };
 
   using MapType =
 
@@ -820,6 +820,43 @@ static paddle::Tensor getValueForBoolTensor(const paddle::Tensor& tensor,
       indices_int64.push_back(indice);
     }
 
+    // AMP Logic
+    if (egr::Controller::Instance().GetAMPLevel() !=
+        paddle::imperative::AmpLevel::O0) {
+      auto op_name = phi::TransToFluidOpName("index_elementwise_get");
+      paddle::small_vector<std::vector<paddle::Tensor>,
+                           egr::kSlotSmallVectorSize>
+          amp_tensors_vector = {{self_tensor}};
+
+      auto amp_dst_dtype =
+          paddle::imperative::GetAmpDestDtype(op_name, amp_tensors_vector);
+
+      auto new_self_tensor = paddle::imperative::AmpAutoCast(
+          "self_tensor", self_tensor, amp_dst_dtype, op_name);
+      auto new_tensor = paddle::imperative::AmpAutoCast(
+          "tensor", tensor, amp_dst_dtype, op_name);
+
+      {
+        paddle::imperative::AutoCastGuard guard(
+            egr::Controller::Instance().GetCurrentAmpAttrs(),
+            paddle::imperative::AmpLevel::O0);
+
+        AdvancedIndex ad = AdvancedIndex(new_tensor, indices_int64);
+        const bool is_combined = false;
+        const bool accumulate = false;
+
+        return index_elementwise_get_ad_func(new_self_tensor,
+                                             ad.indices,
+                                             ad.src_sizes,
+                                             ad.src_strides,
+                                             ad.indexed_sizes,
+                                             ad.indexed_strides,
+                                             slice_offset,
+                                             accumulate,
+                                             is_combined);
+      }
+    }
+
     AdvancedIndex ad = AdvancedIndex(tensor, indices_int64);
     const bool is_combined = false;
     const bool accumulate = false;
@@ -1287,6 +1324,45 @@ static void ApplyGetitem(const int index_size,
                     transed_tensor,
                     &transed_index_int64);
 
+      // AMP Logic
+      if (egr::Controller::Instance().GetAMPLevel() !=
+          paddle::imperative::AmpLevel::O0) {
+        auto op_name = phi::TransToFluidOpName("index_elementwise_get");
+        paddle::small_vector<std::vector<paddle::Tensor>,
+                             egr::kSlotSmallVectorSize>
+            amp_tensors_vector = {{*self_tensor}};
+
+        auto amp_dst_dtype =
+            paddle::imperative::GetAmpDestDtype(op_name, amp_tensors_vector);
+
+        auto new_self_tensor = paddle::imperative::AmpAutoCast(
+            "self_tensor", *self_tensor, amp_dst_dtype, op_name);
+        auto new_transed_tensor = paddle::imperative::AmpAutoCast(
+            "transed_tensor", *transed_tensor, amp_dst_dtype, op_name);
+
+        {
+          paddle::imperative::AutoCastGuard guard(
+              egr::Controller::Instance().GetCurrentAmpAttrs(),
+              paddle::imperative::AmpLevel::O0);
+
+          AdvancedIndex ad =
+              AdvancedIndex(new_transed_tensor, transed_index_int64);
+
+          const bool is_combined = (index_size == 1) ? false : true;
+          const bool accumulate = true;
+          *out = index_elementwise_get_ad_func(new_self_tensor,
+                                               ad.indices,
+                                               ad.src_sizes,
+                                               ad.src_strides,
+                                               ad.indexed_sizes,
+                                               ad.indexed_strides,
+                                               slice_offset,
+                                               accumulate,
+                                               is_combined);
+        }
+        return;
+      }
+
       AdvancedIndex ad = AdvancedIndex(*transed_tensor, transed_index_int64);
       // is_combined:
       //   Distinguishes between regular indexing (single index) and combined
 
@@ -2948,6 +2948,10 @@
 
 - op : isfinite
   args : (Tensor x)
+  python_api:
+    name : [paddle.isfinite, paddle.Tensor.isfinite]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : IsfiniteInferMeta
@@ -2959,6 +2963,10 @@
 
 - op : isinf
   args : (Tensor x)
+  python_api:
+    name : [paddle.isinf, paddle.Tensor.isinf]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : IsfiniteInferMeta
@@ -2970,6 +2978,10 @@
 
 - op : isnan
   args : (Tensor x)
+  python_api:
+    name : [paddle.isnan, paddle.Tensor.isnan]
+    args_alias:
+      use_default_mapping : True
   output : Tensor(out)
   infer_meta :
     func : IsfiniteInferMeta
 
@@ -3288,10 +3288,14 @@ function is_run_distribute_in_op_test() {
             export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1
         fi
     done
-    ALL_CHANGE_FILES=`git diff --numstat upstream/$BRANCH | awk '{print $3}' | grep ".py"|| true`
+    ALL_CHANGE_FILES=$(git diff --name-only upstream/$BRANCH | grep ".py"|| true)
     echo ${ALL_CHANGE_FILES}
     for CHANGE_FILE in ${ALL_CHANGE_FILES}; do
-        ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH ${PADDLE_ROOT}/${CHANGE_FILE} | grep "+" | grep "check_auto_parallel=" || true`
+        TARGET_FILE="${PADDLE_ROOT}/${CHANGE_FILE}"
+        if [ ! -f "$TARGET_FILE" ]; then
+            continue
+        fi
+        ALL_OPTEST_BAN_AUTO_PARALLEL_TEST=`git diff -U0 upstream/$BRANCH "$TARGET_FILE" | grep "+" | grep "check_auto_parallel=" || true`
         if [ "${ALL_OPTEST_BAN_AUTO_PARALLEL_TEST}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then
             export FLAGS_COVERAGE_RUN_AUTO_PARALLEL_IN_OP_TEST=1
         fi
 
@@ -368,6 +368,7 @@
     row_stack,
     scatter,
     scatter_,
+    scatter_add,
     scatter_nd,
     scatter_nd_add,
     scatter_reduce,
@@ -625,6 +626,9 @@
     where,
     where_,
 )
+from .tensor.softmax import (
+    softmax,
+)
 from .tensor.stat import (
     mean,
     median,
@@ -1262,6 +1266,7 @@ def __dir__(self):
     'take_along_axis',
     'scatter_reduce',
     'put_along_axis',
+    'scatter_add',
     'select_scatter',
     'multigammaln',
     'multigammaln_',
@@ -1327,6 +1332,7 @@ def __dir__(self):
     'get_autocast_dtype',
     'get_autocast_cpu_dtype',
     'get_autocast_gpu_dtype',
+    'softmax',
 ]
 import os