From 8170041b1deb671d80885e196a64e874df6ca8fc Mon Sep 17 00:00:00 2001 From: Xiangyi Zhu <82511136+zhuxiangyi@users.noreply.github.com> Date: Mon, 5 Jan 2026 15:27:52 +0800 Subject: [PATCH] [fix](rebanlace) resolving data inconsistencies caused by rebanlace --- .../apache/doris/clone/TabletScheduler.java | 40 ++- ...etSchedulerBalanceWithTransactionTest.java | 302 ++++++++++++++++++ 2 files changed, 339 insertions(+), 3 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedulerBalanceWithTransactionTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index a15830efe0151b..10d52eb169fc7c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -596,14 +596,48 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) } try { + long partitionId = tabletCtx.getPartitionId(); + // Check PRECOMMITTED transactions at partition level + // Use DebugPoint to simulate PRECOMMITTED transaction for testing + if (DebugPointUtil.isEnable("TabletScheduler.checkPreCommittedTransaction.return_true")) { + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, + "There exists PRECOMMITTED transaction related to partition " + + partitionId); + } for (TransactionState transactionState : Env.getCurrentGlobalTransactionMgr().getPreCommittedTxnList(db.getId())) { if (transactionState.getTableIdList().contains(tbl.getId())) { - // If table releate to transaction with precommitted status, do not allow to do balance. - throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, - "There exists PRECOMMITTED transaction related to table"); + // Check if this transaction affects the specific partition + org.apache.doris.transaction.TableCommitInfo tableCommitInfo = + transactionState.getTableCommitInfo(tbl.getId()); + if (tableCommitInfo != null + && tableCommitInfo.getPartitionCommitInfo(partitionId) != null) { + // If partition has PRECOMMITTED transaction, do not allow to do balance. + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, + "There exists PRECOMMITTED transaction related to partition " + + partitionId); + } } } + + // Check COMMITTED but not VISIBLE transactions at partition level (especially for broker load) + // This is critical because broker load may have committed transactions that are not yet visible. + // If we allow balance during this period, the clone task may miss data that is committed + // but not yet visible, leading to data inconsistency. + // We check at partition level instead of table level to be more precise and avoid + // unnecessarily blocking balance for other partitions. + // Use DebugPoint to simulate COMMITTED transaction for testing + boolean hasCommittedTxns = DebugPointUtil.isEnable( + "GlobalTransactionMgr.existCommittedTxns.return_true") + || Env.getCurrentGlobalTransactionMgr().existCommittedTxns( + db.getId(), tbl.getId(), partitionId); + if (hasCommittedTxns) { + // If partition has COMMITTED but not VISIBLE transaction, do not allow to do balance. + // This prevents data inconsistency during broker load. + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, + "There exists COMMITTED but not VISIBLE transaction related to partition " + + partitionId + ", waiting for transaction to become visible before balance"); + } } catch (AnalysisException e) { // CHECKSTYLE IGNORE THIS LINE LOG.warn("Exception:", e); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedulerBalanceWithTransactionTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedulerBalanceWithTransactionTest.java new file mode 100644 index 00000000000000..c5b33f3f7bda75 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TabletSchedulerBalanceWithTransactionTest.java @@ -0,0 +1,302 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.ReplicaAllocation; +import org.apache.doris.catalog.Table; +import org.apache.doris.clone.TabletSchedCtx.BalanceType; +import org.apache.doris.clone.TabletSchedCtx.Type; +import org.apache.doris.common.Config; +import org.apache.doris.common.util.DebugPointUtil; +import org.apache.doris.resource.Tag; +import org.apache.doris.system.Backend; +import org.apache.doris.transaction.GlobalTransactionMgrIface; +import org.apache.doris.utframe.TestWithFeService; + +import com.google.common.collect.Lists; +import org.junit.Assert; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** + * Test cases for TabletScheduler balance with transaction protection. + *
+ * This test verifies that balance operations are properly blocked when there are + * PRECOMMITTED or COMMITTED but not VISIBLE transactions, preventing data inconsistency + * during broker load operations. + *
+ * Note: This test uses reflection to access private methods for testing purposes.
+ */
+public class TabletSchedulerBalanceWithTransactionTest extends TestWithFeService {
+ private Database db;
+ private OlapTable testTable;
+ private long partitionId;
+ private TabletScheduler scheduler;
+ private GlobalTransactionMgrIface globalTransactionMgr;
+
+ @Override
+ protected void beforeCreatingConnectContext() throws Exception {
+ Config.enable_debug_points = true;
+ Config.allow_replica_on_same_host = false;
+ Config.disable_balance = false; // Enable balance for testing
+ }
+
+ @Override
+ protected int backendNum() {
+ return 3;
+ }
+
+ @Override
+ protected void runBeforeAll() throws Exception {
+ Thread.sleep(1000);
+ createDatabase("test");
+ useDatabase("test");
+ db = Env.getCurrentInternalCatalog().getDbOrMetaException("test");
+
+ // Create a test table with 2 replicas
+ String createTableSql = "CREATE TABLE test_table (\n"
+ + " k1 INT NOT NULL,\n"
+ + " k2 INT NOT NULL,\n"
+ + " v1 INT SUM\n"
+ + ") ENGINE=OLAP\n"
+ + "AGGREGATE KEY(k1, k2)\n"
+ + "DISTRIBUTED BY HASH(k1) BUCKETS 1\n"
+ + "PROPERTIES (\n"
+ + " \"replication_num\" = \"2\"\n"
+ + ");";
+ createTable(createTableSql);
+
+ testTable = (OlapTable) db.getTableOrMetaException("test_table");
+ partitionId = testTable.getPartitions().iterator().next().getId();
+
+ scheduler = Env.getCurrentEnv().getTabletScheduler();
+ globalTransactionMgr = Env.getCurrentGlobalTransactionMgr();
+ }
+
+ @Override
+ protected void runBeforeEach() throws Exception {
+ // Clean up before each test
+ for (Table table : db.getTables()) {
+ if (!table.getName().equals("test_table")) {
+ dropTable(table.getName(), true);
+ }
+ }
+ for (Backend be : Env.getCurrentSystemInfo().getBackendsByTag(Tag.DEFAULT_BACKEND_TAG)) {
+ be.setDecommissioned(false);
+ }
+ scheduler.clear();
+ DebugPointUtil.clearDebugPoints();
+ }
+
+ /**
+ * Test that balance is blocked when there is a PRECOMMITTED transaction for the partition.
+ * Uses reflection to test the private scheduleTablet method.
+ */
+ @Test
+ public void testBalanceBlockedByPreCommittedTransaction() throws Exception {
+ // Use DebugPoint to simulate PRECOMMITTED transaction check
+ // This simulates the scenario where there is a PRECOMMITTED transaction
+ DebugPointUtil.addDebugPoint("TabletScheduler.checkPreCommittedTransaction.return_true");
+
+ // Create a balance tablet context
+ TabletSchedCtx tabletCtx = createBalanceTabletCtx(testTable);
+
+ // Try to add the tablet - it should be added but blocked during scheduling
+ TabletScheduler.AddResult result = scheduler.addTablet(tabletCtx, false);
+ Assert.assertEquals("Tablet should be added to pending queue",
+ TabletScheduler.AddResult.ADDED, result);
+
+ // Run scheduler - it should not schedule the tablet due to PRECOMMITTED transaction
+ scheduler.runAfterCatalogReady();
+
+ // Verify tablet is still in pending queue (not scheduled)
+ // The tablet should remain in pending queue because scheduleTablet throws SchedException
+ // which is caught and tablet is not moved to running
+ List