Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dxf: refactor failpoints on task executor #57680

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pkg/disttask/framework/integrationtests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ go_test(
],
flaky = True,
race = "off",
shard_count = 23,
shard_count = 22,
deps = [
"//pkg/config",
"//pkg/ddl",
"//pkg/disttask/framework/handle",
"//pkg/disttask/framework/mock/execute",
"//pkg/disttask/framework/proto",
"//pkg/disttask/framework/scheduler",
"//pkg/disttask/framework/scheduler/mock",
Expand All @@ -34,6 +35,7 @@ go_test(
"//pkg/testkit/testfailpoint",
"//pkg/testkit/testsetup",
"//pkg/util",
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_stretchr_testify//require",
"@io_opencensus_go//stats/view",
Expand Down
2 changes: 1 addition & 1 deletion pkg/disttask/framework/integrationtests/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func registerTaskTypeForBench(c *testutil.TestDXFContext) {
).AnyTimes()
schedulerExt.EXPECT().OnDone(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()

testutil.RegisterTaskMetaWithDXFCtx(c, schedulerExt, func(ctx context.Context, subtask *proto.Subtask) error {
registerExampleTaskWithDXFCtx(c, schedulerExt, func(ctx context.Context, subtask *proto.Subtask) error {
select {
case <-ctx.Done():
taskManager, err := storage.GetTaskManager()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ import (

func TestRetryErrOnNextSubtasksBatch(t *testing.T) {
c := testutil.NewTestDXFContext(t, 2, 16, true)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetPlanErrSchedulerExt(c.MockCtrl, c.TestContext), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetPlanErrSchedulerExt(c.MockCtrl, c.TestContext), c.TestContext, nil)
submitTaskAndCheckSuccessForBasic(c.Ctx, t, "key1", c.TestContext)
}

func TestPlanNotRetryableOnNextSubtasksBatchErr(t *testing.T) {
c := testutil.NewTestDXFContext(t, 2, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetPlanNotRetryableErrSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetPlanNotRetryableErrSchedulerExt(c.MockCtrl), c.TestContext, nil)
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetStepTwoPlanNotRetryableErrSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetStepTwoPlanNotRetryableErrSchedulerExt(c.MockCtrl), c.TestContext, nil)
task = testutil.SubmitAndWaitTask(c.Ctx, t, "key2", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
}
6 changes: 3 additions & 3 deletions pkg/disttask/framework/integrationtests/framework_ha_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func submitTaskAndCheckSuccessForHA(ctx context.Context, t *testing.T, taskKey s
func TestHANodeRandomShutdown(t *testing.T) {
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/mockTiDBShutdown", "return()")
c := testutil.NewDXFContextWithRandomNodes(t, 4, 15)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)

// we keep [1, 10] nodes running, as we only have 10 subtask at stepOne
keepCount := int(math.Min(float64(c.NodeCount()-1), float64(c.Rand.Intn(10)+1)))
Expand All @@ -58,7 +58,7 @@ func TestHARandomShutdownInDifferentStep(t *testing.T) {
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/mockTiDBShutdown", "return()")
c := testutil.NewDXFContextWithRandomNodes(t, 6, 15)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)
// they might overlap, but will leave at least 2 nodes running
nodeNeedDownAtStepOne := c.GetRandNodeIDs(c.NodeCount()/2 - 1)
nodeNeedDownAtStepTwo := c.GetRandNodeIDs(c.NodeCount()/2 - 1)
Expand Down Expand Up @@ -92,7 +92,7 @@ func TestHAMultipleOwner(t *testing.T) {
}
require.Equal(t, prevCount+additionalOwnerCnt, c.NodeCount())

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockHATestSchedulerExt(c.MockCtrl), c.TestContext, nil)
var wg util.WaitGroupWrapper
for i := 0; i < 10; i++ {
taskKey := fmt.Sprintf("key%d", i)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func CheckSubtasksState(ctx context.Context, t *testing.T, taskID int64, state p
func TestFrameworkPauseAndResume(t *testing.T) {
c := testutil.NewTestDXFContext(t, 3, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
// 1. schedule and pause one running task.
var counter atomic.Int32
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/scheduler/beforeRefreshTask", func(task *proto.Task) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (

func TestFrameworkRollback(t *testing.T) {
c := testutil.NewTestDXFContext(t, 2, 16, true)
testutil.RegisterRollbackTaskMeta(t, c.MockCtrl, testutil.GetMockRollbackSchedulerExt(c.MockCtrl), c.TestContext)
testutil.RegisterTaskTypeForRollback(t, c.MockCtrl, testutil.GetMockRollbackSchedulerExt(c.MockCtrl), c.TestContext)
var counter atomic.Int32
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/scheduler/afterRefreshTask",
func(task *proto.Task) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ func TestScopeBasic(t *testing.T) {
nodeCnt := 3
c := testutil.NewTestDXFContext(t, nodeCnt, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, getMockBasicSchedulerExtForScope(c.MockCtrl, 3), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, getMockBasicSchedulerExtForScope(c.MockCtrl, 3), c.TestContext, nil)
tk := testkit.NewTestKit(t, c.Store)

// 1. all "" role.
Expand Down Expand Up @@ -178,7 +178,7 @@ func runTargetScopeCase(t *testing.T, c *testutil.TestDXFContext, tk *testkit.Te
func TestTargetScope(t *testing.T) {
nodeCnt := 10
c := testutil.NewTestDXFContext(t, nodeCnt, 16, true)
testutil.RegisterTaskMeta(t, c.MockCtrl, getMockBasicSchedulerExtForScope(c.MockCtrl, nodeCnt), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, getMockBasicSchedulerExtForScope(c.MockCtrl, nodeCnt), c.TestContext, nil)
tk := testkit.NewTestKit(t, c.Store)
caseNum := 10
for i := 0; i < caseNum; i++ {
Expand Down
114 changes: 86 additions & 28 deletions pkg/disttask/framework/integrationtests/framework_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,46 @@ import (
"testing"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
mockexecute "github.com/pingcap/tidb/pkg/disttask/framework/mock/execute"
"github.com/pingcap/tidb/pkg/disttask/framework/proto"
"github.com/pingcap/tidb/pkg/disttask/framework/scheduler"
"github.com/pingcap/tidb/pkg/disttask/framework/storage"
"github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor"
"github.com/pingcap/tidb/pkg/disttask/framework/testutil"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/pkg/testkit/testfailpoint"
"github.com/pingcap/tidb/pkg/util"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
)

func registerExampleTaskWithDXFCtx(c *testutil.TestDXFContext, schedulerExt scheduler.Extension, runSubtaskFn func(ctx context.Context, subtask *proto.Subtask) error) {
registerExampleTask(c.T, c.MockCtrl, schedulerExt, c.TestContext, runSubtaskFn)
}

func registerExampleTask(t testing.TB, ctrl *gomock.Controller, schedulerExt scheduler.Extension, testContext *testutil.TestContext, runSubtaskFn func(ctx context.Context, subtask *proto.Subtask) error) {
if runSubtaskFn == nil {
runSubtaskFn = getCommonSubtaskRunFn(testContext)
}
stepExecutor := testutil.GetCommonStepExecutor(ctrl, runSubtaskFn)
executorExt := testutil.GetCommonTaskExecutorExt(ctrl, stepExecutor)
testutil.RegisterExampleTask(t, schedulerExt, executorExt, testutil.GetCommonCleanUpRoutine(ctrl))
}

func getCommonSubtaskRunFn(testCtx *testutil.TestContext) func(_ context.Context, subtask *proto.Subtask) error {
return func(_ context.Context, subtask *proto.Subtask) error {
switch subtask.Step {
case proto.StepOne, proto.StepTwo:
testCtx.CollectSubtask(subtask)
default:
panic("invalid step")
}
return nil
}
}

func submitTaskAndCheckSuccessForBasic(ctx context.Context, t *testing.T, taskKey string, testContext *testutil.TestContext) int64 {
return submitTaskAndCheckSuccess(ctx, t, taskKey, "", testContext, map[proto.Step]int{
proto.StepOne: 3,
Expand All @@ -53,7 +82,7 @@ func submitTaskAndCheckSuccess(ctx context.Context, t *testing.T, taskKey string
func TestRandomOwnerChangeWithMultipleTasks(t *testing.T) {
c := testutil.NewTestDXFContext(t, 5, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var wg util.WaitGroupWrapper
for i := 0; i < 10; i++ {
taskKey := fmt.Sprintf("key%d", i)
Expand All @@ -79,7 +108,7 @@ func TestFrameworkScaleInAndOut(t *testing.T) {
t.Logf("seed: %d", seed)
random := rand.New(rand.NewSource(seed))

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var wg util.WaitGroupWrapper
for i := 0; i < 12; i++ {
taskKey := fmt.Sprintf("key%d", i)
Expand All @@ -103,7 +132,7 @@ func TestFrameworkScaleInAndOut(t *testing.T) {
func TestFrameworkWithQuery(t *testing.T) {
c := testutil.NewTestDXFContext(t, 2, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var wg util.WaitGroupWrapper
wg.Run(func() {
submitTaskAndCheckSuccessForBasic(c.Ctx, t, "key1", c.TestContext)
Expand All @@ -127,33 +156,33 @@ func TestFrameworkWithQuery(t *testing.T) {
func TestFrameworkCancelTask(t *testing.T) {
c := testutil.NewTestDXFContext(t, 2, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)

testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/MockExecutorRunCancel", "1*return(1)")
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
}

func TestFrameworkSubTaskFailed(t *testing.T) {
c := testutil.NewTestDXFContext(t, 1, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/MockExecutorRunErr", "1*return(true)")
Comment on lines -137 to -141
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one is merge into TestFrameworkRunSubtaskCancelOrFailed

registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var once sync.Once
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/beforeCallOnSubtaskFinished",
func(subtask *proto.Subtask) {
once.Do(func() {
require.NoError(t, c.TaskMgr.CancelTask(c.Ctx, subtask.TaskID))
})
},
)
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
}

func TestFrameworkSubTaskInitEnvFailed(t *testing.T) {
c := testutil.NewTestDXFContext(t, 1, 16, true)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/mockExecSubtaskInitEnvErr", "return()")
schedulerExt := testutil.GetMockBasicSchedulerExt(c.MockCtrl)
stepExec := mockexecute.NewMockStepExecutor(c.MockCtrl)
stepExec.EXPECT().Init(gomock.Any()).Return(errors.New("mockExecSubtaskInitEnvErr")).AnyTimes()
executorExt := testutil.GetCommonTaskExecutorExt(c.MockCtrl, stepExec)
testutil.RegisterExampleTask(t, schedulerExt, executorExt, testutil.GetCommonCleanUpRoutine(c.MockCtrl))
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
}

func TestOwnerChangeWhenSchedule(t *testing.T) {
c := testutil.NewTestDXFContext(t, 3, 16, true)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var once sync.Once
require.NoError(t, failpoint.EnableCall("github.com/pingcap/tidb/pkg/disttask/framework/scheduler/mockOwnerChange", func() {
once.Do(func() {
Expand All @@ -178,7 +207,7 @@ func TestGC(t *testing.T) {
})
c := testutil.NewTestDXFContext(t, 3, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)

submitTaskAndCheckSuccessForBasic(c.Ctx, t, "😊", c.TestContext)

Expand Down Expand Up @@ -208,19 +237,48 @@ func TestGC(t *testing.T) {
func TestFrameworkSubtaskFinishedCancel(t *testing.T) {
c := testutil.NewTestDXFContext(t, 3, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/MockSubtaskFinishedCancel", "1*return(true)")
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var once sync.Once
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/afterOnFinishedCalled",
func(e *taskexecutor.BaseTaskExecutor) {
once.Do(func() {
e.CancelRunningSubtask()
})
},
)
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
}

func TestFrameworkRunSubtaskCancel(t *testing.T) {
func TestFrameworkRunSubtaskCancelOrFailed(t *testing.T) {
c := testutil.NewTestDXFContext(t, 3, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
testfailpoint.Enable(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/MockRunSubtaskCancel", "1*return(true)")
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
t.Run("meet cancel on run subtask", func(t *testing.T) {
var once sync.Once
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/changeRunSubtaskError",
func(errP *error) {
once.Do(func() {
*errP = taskexecutor.ErrCancelSubtask
})
},
)
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key1", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
})

t.Run("meet some error on run subtask", func(t *testing.T) {
var once sync.Once
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/taskexecutor/changeRunSubtaskError",
func(errP *error) {
once.Do(func() {
*errP = errors.New("MockExecutorRunErr")
})
},
)
task := testutil.SubmitAndWaitTask(c.Ctx, t, "key2", "", 1)
require.Equal(t, proto.TaskStateReverted, task.State)
})
}

func TestFrameworkCleanUpRoutine(t *testing.T) {
Expand All @@ -230,7 +288,7 @@ func TestFrameworkCleanUpRoutine(t *testing.T) {
}()
scheduler.DefaultCleanUpInterval = 500 * time.Millisecond
c := testutil.NewTestDXFContext(t, 3, 16, true)
testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
ch := make(chan struct{}, 1)
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/scheduler/WaitCleanUpFinished", func() {
ch <- struct{}{}
Expand Down Expand Up @@ -265,7 +323,7 @@ func TestFrameworkCleanUpRoutine(t *testing.T) {
func TestTaskCancelledBeforeUpdateTask(t *testing.T) {
c := testutil.NewTestDXFContext(t, 1, 16, true)

testutil.RegisterTaskMeta(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
registerExampleTask(t, c.MockCtrl, testutil.GetMockBasicSchedulerExt(c.MockCtrl), c.TestContext, nil)
var once sync.Once
testfailpoint.EnableCall(t, "github.com/pingcap/tidb/pkg/disttask/framework/scheduler/cancelBeforeUpdateTask", func(taskID int64) {
once.Do(func() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func (c *resourceCtrlCaseContext) init(subtaskCntMap map[int64]map[proto.Step]in
).AnyTimes()
schedulerExt.EXPECT().OnDone(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes()

testutil.RegisterTaskMetaWithDXFCtx(c.TestDXFContext, schedulerExt, func(ctx context.Context, subtask *proto.Subtask) error {
registerExampleTaskWithDXFCtx(c.TestDXFContext, schedulerExt, func(ctx context.Context, subtask *proto.Subtask) error {
ch := c.enterSubtask(subtask)
defer c.leaveSubtask(subtask)
select {
Expand Down
Loading