diff --git a/expression/schema.go b/expression/schema.go
index 1ca8740de5eea..6207b1d9fd1ae 100644
--- a/expression/schema.go
+++ b/expression/schema.go
@@ -30,6 +30,15 @@ func (ki KeyInfo) Clone() KeyInfo {
 	return result
 }
 
+// String implements fmt.Stringer interface.
+func (ki KeyInfo) String() string {
+	ukColStrs := make([]string, 0, len(ki))
+	for _, col := range ki {
+		ukColStrs = append(ukColStrs, col.String())
+	}
+	return "[" + strings.Join(ukColStrs, ",") + "]"
+}
+
 // Schema stands for the row schema and unique key information get from input.
 type Schema struct {
 	Columns []*Column
@@ -47,11 +56,7 @@ func (s *Schema) String() string {
 	}
 	ukStrs := make([]string, 0, len(s.Keys))
 	for _, key := range s.Keys {
-		ukColStrs := make([]string, 0, len(key))
-		for _, col := range key {
-			ukColStrs = append(ukColStrs, col.String())
-		}
-		ukStrs = append(ukStrs, "["+strings.Join(ukColStrs, ",")+"]")
+		ukStrs = append(ukStrs, key.String())
 	}
 	return "Column: [" + strings.Join(colStrs, ",") + "] Unique key: [" + strings.Join(ukStrs, ",") + "]"
 }
diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go
index b662db288d3b1..a0bb8a8070726 100644
--- a/planner/core/logical_plan_test.go
+++ b/planner/core/logical_plan_test.go
@@ -2071,58 +2071,3 @@ func (s *testPlanSuite) TestWindowLogicalPlanAmbiguous(c *C) {
 		}
 	}
 }
-
-func (s *testPlanSuite) TestLogicalOptimizeWithTraceEnabled(c *C) {
-	sql := "select * from t where a in (1,2)"
-	defer testleak.AfterTest(c)()
-	tt := []struct {
-		flags []uint64
-		steps int
-	}{
-		{
-			flags: []uint64{
-				flagEliminateAgg,
-				flagPushDownAgg},
-			steps: 2,
-		},
-		{
-			flags: []uint64{
-				flagEliminateAgg,
-				flagPushDownAgg,
-				flagPrunColumns,
-				flagBuildKeyInfo,
-			},
-			steps: 4,
-		},
-		{
-			flags: []uint64{},
-			steps: 0,
-		},
-	}
-
-	for i, tc := range tt {
-		comment := Commentf("case:%v sql:%s", i, sql)
-		stmt, err := s.ParseOneStmt(sql, "", "")
-		c.Assert(err, IsNil, comment)
-		err = Preprocess(s.ctx, stmt, WithPreprocessorReturn(&PreprocessorReturn{InfoSchema: s.is}))
-		c.Assert(err, IsNil, comment)
-		sctx := MockContext()
-		sctx.GetSessionVars().EnableStmtOptimizeTrace = true
-		builder, _ := NewPlanBuilder().Init(sctx, s.is, &hint.BlockHintProcessor{})
-		domain.GetDomain(sctx).MockInfoCacheAndLoadInfoSchema(s.is)
-		ctx := context.TODO()
-		p, err := builder.Build(ctx, stmt)
-		c.Assert(err, IsNil)
-		flag := uint64(0)
-		for _, f := range tc.flags {
-			flag = flag | f
-		}
-		p, err = logicalOptimize(ctx, flag, p.(LogicalPlan))
-		c.Assert(err, IsNil)
-		_, ok := p.(*LogicalProjection)
-		c.Assert(ok, IsTrue)
-		otrace := sctx.GetSessionVars().StmtCtx.LogicalOptimizeTrace
-		c.Assert(otrace, NotNil)
-		c.Assert(len(otrace.Steps), Equals, tc.steps)
-	}
-}
diff --git a/planner/core/logical_plan_trace_test.go b/planner/core/logical_plan_trace_test.go
new file mode 100644
index 0000000000000..cce082cd81e42
--- /dev/null
+++ b/planner/core/logical_plan_trace_test.go
@@ -0,0 +1,147 @@
+// Copyright 2021 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package core
+
+import (
+	"context"
+
+	. "github.com/pingcap/check"
+	"github.com/pingcap/tidb/domain"
+	"github.com/pingcap/tidb/util/hint"
+	"github.com/pingcap/tidb/util/testleak"
+)
+
+func (s *testPlanSuite) TestLogicalOptimizeWithTraceEnabled(c *C) {
+	sql := "select * from t where a in (1,2)"
+	defer testleak.AfterTest(c)()
+	tt := []struct {
+		flags []uint64
+		steps int
+	}{
+		{
+			flags: []uint64{
+				flagEliminateAgg,
+				flagPushDownAgg},
+			steps: 2,
+		},
+		{
+			flags: []uint64{
+				flagEliminateAgg,
+				flagPushDownAgg,
+				flagPrunColumns,
+				flagBuildKeyInfo,
+			},
+			steps: 4,
+		},
+		{
+			flags: []uint64{},
+			steps: 0,
+		},
+	}
+
+	for i, tc := range tt {
+		comment := Commentf("case:%v sql:%s", i, sql)
+		stmt, err := s.ParseOneStmt(sql, "", "")
+		c.Assert(err, IsNil, comment)
+		err = Preprocess(s.ctx, stmt, WithPreprocessorReturn(&PreprocessorReturn{InfoSchema: s.is}))
+		c.Assert(err, IsNil, comment)
+		sctx := MockContext()
+		sctx.GetSessionVars().EnableStmtOptimizeTrace = true
+		builder, _ := NewPlanBuilder().Init(sctx, s.is, &hint.BlockHintProcessor{})
+		domain.GetDomain(sctx).MockInfoCacheAndLoadInfoSchema(s.is)
+		ctx := context.TODO()
+		p, err := builder.Build(ctx, stmt)
+		c.Assert(err, IsNil)
+		flag := uint64(0)
+		for _, f := range tc.flags {
+			flag = flag | f
+		}
+		p, err = logicalOptimize(ctx, flag, p.(LogicalPlan))
+		c.Assert(err, IsNil)
+		_, ok := p.(*LogicalProjection)
+		c.Assert(ok, IsTrue)
+		otrace := sctx.GetSessionVars().StmtCtx.LogicalOptimizeTrace
+		c.Assert(otrace, NotNil)
+		c.Assert(len(otrace.Steps), Equals, tc.steps)
+	}
+}
+
+func (s *testPlanSuite) TestSingleRuleTraceStep(c *C) {
+	defer testleak.AfterTest(c)()
+	tt := []struct {
+		sql             string
+		flags           []uint64
+		assertRuleName  string
+		assertRuleSteps []assertTraceStep
+	}{
+		{
+			sql:            "select min(distinct a) from t group by a",
+			flags:          []uint64{flagBuildKeyInfo, flagEliminateAgg},
+			assertRuleName: "aggregation_eliminate",
+			assertRuleSteps: []assertTraceStep{
+				{
+					assertReason: "[test.t.a] is a unique key",
+					assertAction: "min(distinct ...) is simplified to min(...)",
+				},
+				{
+					assertReason: "[test.t.a] is a unique key",
+					assertAction: "aggregation is simplified to a projection",
+				},
+			},
+		},
+	}
+
+	for i, tc := range tt {
+		sql := tc.sql
+		comment := Commentf("case:%v sql:%s", i, sql)
+		stmt, err := s.ParseOneStmt(sql, "", "")
+		c.Assert(err, IsNil, comment)
+		err = Preprocess(s.ctx, stmt, WithPreprocessorReturn(&PreprocessorReturn{InfoSchema: s.is}))
+		c.Assert(err, IsNil, comment)
+		sctx := MockContext()
+		sctx.GetSessionVars().EnableStmtOptimizeTrace = true
+		builder, _ := NewPlanBuilder().Init(sctx, s.is, &hint.BlockHintProcessor{})
+		domain.GetDomain(sctx).MockInfoCacheAndLoadInfoSchema(s.is)
+		ctx := context.TODO()
+		p, err := builder.Build(ctx, stmt)
+		c.Assert(err, IsNil)
+		flag := uint64(0)
+		for _, f := range tc.flags {
+			flag = flag | f
+		}
+		p, err = logicalOptimize(ctx, flag, p.(LogicalPlan))
+		c.Assert(err, IsNil)
+		_, ok := p.(*LogicalProjection)
+		c.Assert(ok, IsTrue)
+		otrace := sctx.GetSessionVars().StmtCtx.LogicalOptimizeTrace
+		c.Assert(otrace, NotNil)
+		assert := false
+		for _, step := range otrace.Steps {
+			if step.RuleName == tc.assertRuleName {
+				assert = true
+				for i, ruleStep := range step.Steps {
+					c.Assert(ruleStep.Action, Equals, tc.assertRuleSteps[i].assertAction)
+					c.Assert(ruleStep.Reason, Equals, tc.assertRuleSteps[i].assertReason)
+				}
+			}
+		}
+		c.Assert(assert, IsTrue)
+	}
+}
+
+type assertTraceStep struct {
+	assertReason string
+	assertAction string
+}
diff --git a/planner/core/optimizer.go b/planner/core/optimizer.go
index f800973cd2f73..ed7e9be5c61b5 100644
--- a/planner/core/optimizer.go
+++ b/planner/core/optimizer.go
@@ -102,7 +102,7 @@ func (op *logicalOptimizeOp) appendBeforeRuleOptimize(index int, name string, be
 	if op.tracer == nil {
 		return
 	}
-	op.tracer.AppendRuleTracerBeforeRuleOptimize(index, name, before.buildLogicalPlanTrace())
+	op.tracer.AppendRuleTracerBeforeRuleOptimize(index, name, before.buildLogicalPlanTrace(before))
 }
 
 func (op *logicalOptimizeOp) appendStepToCurrent(id int, tp, reason, action string) {
@@ -116,7 +116,7 @@ func (op *logicalOptimizeOp) trackAfterRuleOptimize(after LogicalPlan) {
 	if op.tracer == nil {
 		return
 	}
-	op.tracer.TrackLogicalPlanAfterRuleOptimize(after.buildLogicalPlanTrace())
+	op.tracer.TrackLogicalPlanAfterRuleOptimize(after.buildLogicalPlanTrace(after))
 }
 
 // logicalOptRule means a logical optimizing rule, which contains decorrelate, ppd, column pruning, etc.
diff --git a/planner/core/plan.go b/planner/core/plan.go
index 499662b02491c..3515f44e91750 100644
--- a/planner/core/plan.go
+++ b/planner/core/plan.go
@@ -308,7 +308,7 @@ type LogicalPlan interface {
 	canPushToCop(store kv.StoreType) bool
 
 	// buildLogicalPlanTrace clone necessary information from LogicalPlan
-	buildLogicalPlanTrace() *tracing.LogicalPlanTrace
+	buildLogicalPlanTrace(p Plan) *tracing.LogicalPlanTrace
 }
 
 // PhysicalPlan is a tree of the physical operators.
@@ -382,10 +382,10 @@ func (p *baseLogicalPlan) ExplainInfo() string {
 }
 
 // buildLogicalPlanTrace implements LogicalPlan
-func (p *baseLogicalPlan) buildLogicalPlanTrace() *tracing.LogicalPlanTrace {
-	planTrace := &tracing.LogicalPlanTrace{ID: p.ID(), TP: p.TP()}
+func (p *baseLogicalPlan) buildLogicalPlanTrace(plan Plan) *tracing.LogicalPlanTrace {
+	planTrace := &tracing.LogicalPlanTrace{ID: p.ID(), TP: p.TP(), ExplainInfo: plan.ExplainInfo()}
 	for _, child := range p.Children() {
-		planTrace.Children = append(planTrace.Children, child.buildLogicalPlanTrace())
+		planTrace.Children = append(planTrace.Children, child.buildLogicalPlanTrace(child))
 	}
 	return planTrace
 }
diff --git a/planner/core/rule_aggregation_elimination.go b/planner/core/rule_aggregation_elimination.go
index 0ed5e4f8f0276..61d9e0f117e0d 100644
--- a/planner/core/rule_aggregation_elimination.go
+++ b/planner/core/rule_aggregation_elimination.go
@@ -16,6 +16,7 @@ package core
 
 import (
 	"context"
+	"fmt"
 	"math"
 
 	"github.com/pingcap/tidb/expression"
@@ -37,7 +38,7 @@ type aggregationEliminateChecker struct {
 // e.g. select min(b) from t group by a. If a is a unique key, then this sql is equal to `select b from t group by a`.
 // For count(expr), sum(expr), avg(expr), count(distinct expr, [expr...]) we may need to rewrite the expr. Details are shown below.
 // If we can eliminate agg successful, we return a projection. Else we return a nil pointer.
-func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggregation) *LogicalProjection {
+func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggregation, opt *logicalOptimizeOp) *LogicalProjection {
 	for _, af := range agg.AggFuncs {
 		// TODO(issue #9968): Actually, we can rewrite GROUP_CONCAT when all the
 		// arguments it accepts are promised to be NOT-NULL.
@@ -54,9 +55,11 @@ func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggr
 	}
 	schemaByGroupby := expression.NewSchema(agg.GetGroupByCols()...)
 	coveredByUniqueKey := false
+	var uniqueKey expression.KeyInfo
 	for _, key := range agg.children[0].Schema().Keys {
 		if schemaByGroupby.ColumnsIndices(key) != nil {
 			coveredByUniqueKey = true
+			uniqueKey = key
 			break
 		}
 	}
@@ -64,12 +67,15 @@ func (a *aggregationEliminateChecker) tryToEliminateAggregation(agg *LogicalAggr
 		// GroupByCols has unique key, so this aggregation can be removed.
 		if ok, proj := ConvertAggToProj(agg, agg.schema); ok {
 			proj.SetChildren(agg.children[0])
+			appendAggregationEliminateTraceStep(agg, uniqueKey, opt)
 			return proj
 		}
 	}
 	return nil
 }
 
+// tryToEliminateDistinct will eliminate distinct in the aggregation function if the aggregation args
+// have unique key column. see detail example in https://github.com/pingcap/tidb/issues/23436
 func (a *aggregationEliminateChecker) tryToEliminateDistinct(agg *LogicalAggregation, opt *logicalOptimizeOp) {
 	for _, af := range agg.AggFuncs {
 		if af.HasDistinct {
@@ -86,28 +92,43 @@ func (a *aggregationEliminateChecker) tryToEliminateDistinct(agg *LogicalAggrega
 			if canEliminate {
 				distinctByUniqueKey := false
 				schemaByDistinct := expression.NewSchema(cols...)
+				var uniqueKey expression.KeyInfo
 				for _, key := range agg.children[0].Schema().Keys {
 					if schemaByDistinct.ColumnsIndices(key) != nil {
 						distinctByUniqueKey = true
+						uniqueKey = key
 						break
 					}
 				}
 				for _, key := range agg.children[0].Schema().UniqueKeys {
 					if schemaByDistinct.ColumnsIndices(key) != nil {
 						distinctByUniqueKey = true
+						uniqueKey = key
 						break
 					}
 				}
 				if distinctByUniqueKey {
 					af.HasDistinct = false
-					// TODO: fulfill in future pr
-					opt.appendStepToCurrent(agg.ID(), agg.TP(), "", "")
+					appendDistinctEliminateTraceStep(agg, uniqueKey, af, opt)
 				}
 			}
 		}
 	}
 }
 
+func appendAggregationEliminateTraceStep(agg *LogicalAggregation, uniqueKey expression.KeyInfo, opt *logicalOptimizeOp) {
+	opt.appendStepToCurrent(agg.ID(), agg.TP(),
+		fmt.Sprintf("%s is a unique key", uniqueKey.String()),
+		"aggregation is simplified to a projection")
+}
+
+func appendDistinctEliminateTraceStep(agg *LogicalAggregation, uniqueKey expression.KeyInfo, af *aggregation.AggFuncDesc,
+	opt *logicalOptimizeOp) {
+	opt.appendStepToCurrent(agg.ID(), agg.TP(),
+		fmt.Sprintf("%s is a unique key", uniqueKey.String()),
+		fmt.Sprintf("%s(distinct ...) is simplified to %s(...)", af.Name, af.Name))
+}
+
 // ConvertAggToProj convert aggregation to projection.
 func ConvertAggToProj(agg *LogicalAggregation, schema *expression.Schema) (bool, *LogicalProjection) {
 	proj := LogicalProjection{
@@ -196,7 +217,7 @@ func (a *aggregationEliminator) optimize(ctx context.Context, p LogicalPlan, opt
 		return p, nil
 	}
 	a.tryToEliminateDistinct(agg, opt)
-	if proj := a.tryToEliminateAggregation(agg); proj != nil {
+	if proj := a.tryToEliminateAggregation(agg, opt); proj != nil {
 		return proj, nil
 	}
 	return p, nil
diff --git a/planner/core/rule_aggregation_push_down.go b/planner/core/rule_aggregation_push_down.go
index 1acf8179f8a45..9609432d9f195 100644
--- a/planner/core/rule_aggregation_push_down.go
+++ b/planner/core/rule_aggregation_push_down.go
@@ -368,7 +368,7 @@ func (a *aggregationPushDownSolver) pushAggCrossUnion(agg *LogicalAggregation, u
 }
 
 func (a *aggregationPushDownSolver) optimize(ctx context.Context, p LogicalPlan, opt *logicalOptimizeOp) (LogicalPlan, error) {
-	return a.aggPushDown(p)
+	return a.aggPushDown(p, opt)
 }
 
 func (a *aggregationPushDownSolver) tryAggPushDownForUnion(union *LogicalUnionAll, agg *LogicalAggregation) error {
@@ -395,9 +395,9 @@ func (a *aggregationPushDownSolver) tryAggPushDownForUnion(union *LogicalUnionAl
 }
 
 // aggPushDown tries to push down aggregate functions to join paths.
-func (a *aggregationPushDownSolver) aggPushDown(p LogicalPlan) (_ LogicalPlan, err error) {
+func (a *aggregationPushDownSolver) aggPushDown(p LogicalPlan, opt *logicalOptimizeOp) (_ LogicalPlan, err error) {
 	if agg, ok := p.(*LogicalAggregation); ok {
-		proj := a.tryToEliminateAggregation(agg)
+		proj := a.tryToEliminateAggregation(agg, opt)
 		if proj != nil {
 			p = proj
 		} else {
@@ -428,7 +428,7 @@ func (a *aggregationPushDownSolver) aggPushDown(p LogicalPlan) (_ LogicalPlan, e
 					join.SetChildren(lChild, rChild)
 					join.SetSchema(expression.MergeSchema(lChild.Schema(), rChild.Schema()))
 					buildKeyInfo(join)
-					proj := a.tryToEliminateAggregation(agg)
+					proj := a.tryToEliminateAggregation(agg, opt)
 					if proj != nil {
 						p = proj
 					}
@@ -487,7 +487,7 @@ func (a *aggregationPushDownSolver) aggPushDown(p LogicalPlan) (_ LogicalPlan, e
 	}
 	newChildren := make([]LogicalPlan, 0, len(p.Children()))
 	for _, child := range p.Children() {
-		newChild, err := a.aggPushDown(child)
+		newChild, err := a.aggPushDown(child, opt)
 		if err != nil {
 			return nil, err
 		}
diff --git a/util/tracing/opt_trace.go b/util/tracing/opt_trace.go
index 1e401d59b57f1..a466016b3ee82 100644
--- a/util/tracing/opt_trace.go
+++ b/util/tracing/opt_trace.go
@@ -40,11 +40,13 @@ func (tracer *LogicalOptimizeTracer) AppendRuleTracerBeforeRuleOptimize(index in
 
 // AppendRuleTracerStepToCurrent add rule optimize step to current
 func (tracer *LogicalOptimizeTracer) AppendRuleTracerStepToCurrent(id int, tp, reason, action string) {
+	index := len(tracer.curRuleTracer.Steps)
 	tracer.curRuleTracer.Steps = append(tracer.curRuleTracer.Steps, LogicalRuleOptimizeTraceStep{
 		ID:     id,
 		TP:     tp,
 		Reason: reason,
 		Action: action,
+		Index:  index,
 	})
 }
 
@@ -80,6 +82,7 @@ type LogicalRuleOptimizeTraceStep struct {
 	Reason string `json:"reason"`
 	ID     int    `json:"id"`
 	TP     string `json:"type"`
+	Index  int    `json:"index"`
 }
 
 // CETraceRecord records an expression and related cardinality estimation result.