From 3ba13133966c1c11639bb6f6e6ef69604cf5c797 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Mon, 2 Dec 2024 21:42:18 +0800 Subject: [PATCH] [Fix](TPartitionVersionInfo) Fix duplicate `TPartitionVersionInfo` in `PublishVersionTask.partitionVersionInfos` (#44846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What problem does this PR solve? Problem Summary: When FE handles BEs' tablet report and finds that there exists some expired txns on BE, it will generate publish version task. `TPartitionVersionInfo` with same values may be added to `transactionsToPublish` under same txn id many times when partitions involved in this failed txn involves many tablets on this BE. Because it uses `ArrayListMultimap`, these duplicate values may occupy a lot of memories when the number of tablets is large. ### Release note Fixed the issue of FE's memory occupation growing too fast in cases of persistent load and clone failures on merge-on-write tables. 修复了在merge-on-write表上有持续的导入失败和clone失败的情况下,FE使用内存增长过快的问题。 --- .../apache/doris/catalog/TabletInvertedIndex.java | 13 +++++++------ .../java/org/apache/doris/master/ReportHandler.java | 9 +++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index 4a421dc7b2b1ef..a51d1f55014fcc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -37,13 +37,14 @@ import org.apache.doris.transaction.TransactionStatus; import com.google.common.base.Preconditions; -import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.HashBasedTable; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; +import com.google.common.collect.SetMultimap; import com.google.common.collect.Sets; import com.google.common.collect.Table; import com.google.common.collect.TreeMultimap; @@ -135,7 +136,7 @@ public void tabletReport(long backendId, Map backendTablets, Set tabletFoundInMeta, ListMultimap tabletMigrationMap, Map partitionVersionSyncMap, - Map> transactionsToPublish, + Map> transactionsToPublish, ListMultimap transactionsToClear, ListMultimap tabletRecoveryMap, List tabletToUpdate, @@ -314,7 +315,7 @@ && isLocal(tabletMeta.getStorageMedium())) { } private void handleBackendTransactions(long backendId, List transactionIds, long tabletId, - TabletMeta tabletMeta, Map> transactionsToPublish, + TabletMeta tabletMeta, Map> transactionsToPublish, ListMultimap transactionsToClear) { GlobalTransactionMgrIface transactionMgr = Env.getCurrentGlobalTransactionMgr(); long partitionId = tabletMeta.getPartitionId(); @@ -376,15 +377,15 @@ private TPartitionVersionInfo generatePartitionVersionInfoWhenReport(Transaction } private void publishPartition(TransactionState transactionState, long transactionId, TabletMeta tabletMeta, - long partitionId, Map> transactionsToPublish) { + long partitionId, Map> transactionsToPublish) { TPartitionVersionInfo versionInfo = generatePartitionVersionInfoWhenReport(transactionState, transactionId, tabletMeta, partitionId); if (versionInfo != null) { synchronized (transactionsToPublish) { - ListMultimap map = transactionsToPublish.get( + SetMultimap map = transactionsToPublish.get( transactionState.getDbId()); if (map == null) { - map = ArrayListMultimap.create(); + map = LinkedHashMultimap.create(); transactionsToPublish.put(transactionState.getDbId(), map); } map.put(transactionId, versionInfo); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index bc51590ccb5a78..76e0fd4ec66b29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -95,6 +95,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Queues; +import com.google.common.collect.SetMultimap; import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -503,7 +504,7 @@ public void tabletReport(long backendId, Map backendTablets, Map partitionVersionSyncMap = Maps.newConcurrentMap(); // dbid -> txn id -> [partition info] - Map> transactionsToPublish = Maps.newHashMap(); + Map> transactionsToPublish = Maps.newHashMap(); ListMultimap transactionsToClear = LinkedListMultimap.create(); // db id -> tablet id @@ -1147,14 +1148,14 @@ private static void handleMigration(ListMultimap tabletMet } private static void handleRepublishVersionInfo( - Map> transactionsToPublish, long backendId) { + Map> transactionsToPublish, long backendId) { AgentBatchTask batchTask = new AgentBatchTask(); long createPublishVersionTaskTime = System.currentTimeMillis(); for (Long dbId : transactionsToPublish.keySet()) { - ListMultimap map = transactionsToPublish.get(dbId); + SetMultimap map = transactionsToPublish.get(dbId); for (long txnId : map.keySet()) { PublishVersionTask task = new PublishVersionTask(backendId, txnId, dbId, - map.get(txnId), createPublishVersionTaskTime); + Lists.newArrayList(map.get(txnId)), createPublishVersionTaskTime); batchTask.addTask(task); // add to AgentTaskQueue for handling finish report. AgentTaskQueue.addTask(task);