apache · ndimiduk · Sep 25, 2024 · Jul 2, 2024 · Jul 2, 2024 · Sep 24, 2024
diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java
@@ -81,39 +81,55 @@ public void init(Map<String, Object> params) {
  }
  }
 
- private Map<Address, Long> getServerToNewestBackupTs(List<BackupInfo> backups)
+ /**
+ * Calculates the timestamp boundary up to which all backup roots have already included the WAL.
+ * I.e. WALs with a lower (= older) or equal timestamp are no longer needed for future incremental
+ * backups.
+ */
+ private Map<Address, Long> serverToPreservationBoundaryTs(List<BackupInfo> backups)
  throws IOException {
  if (LOG.isDebugEnabled()) {
  LOG.debug(
- "Cleaning WALs if they are older than the newest backups. "
+ "Cleaning WALs if they are older than the newest backups (for all roots). "
  + "Checking WALs against {} backups: {}",
  backups.size(),
  backups.stream().map(BackupInfo::getBackupId).sorted().collect(Collectors.joining(", ")));
  }
- Map<Address, Long> serverAddressToNewestBackupMap = new HashMap<>();
 
- Map<TableName, Long> tableNameBackupInfoMap = new HashMap<>();
- for (BackupInfo backupInfo : backups) {
+ // This map tracks, for every backup root, the most recent created backup (= highest timestamp)
+ Map<String, BackupInfo> newestBackupPerRootDir = new HashMap<>();
+ for (BackupInfo backup : backups) {
+ BackupInfo existingEntry = newestBackupPerRootDir.get(backup.getBackupRootDir());
+ if (existingEntry == null || existingEntry.getStartTs() < backup.getStartTs()) {
+ newestBackupPerRootDir.put(backup.getBackupRootDir(), backup);
+ }
+ }
+
+ // This map tracks, for every address, the least recent (= oldest / lowest timestamp) inclusion
+ // in any backup. In other words, it is the timestamp boundary up to which all backups roots
+ // have included the WAL in their backup.
+ Map<Address, Long> boundaries = new HashMap<>();
+ for (BackupInfo backupInfo : newestBackupPerRootDir.values()) {
  for (TableName table : backupInfo.getTables()) {
- tableNameBackupInfoMap.putIfAbsent(table, backupInfo.getStartTs());
- if (tableNameBackupInfoMap.get(table) <= backupInfo.getStartTs()) {
- tableNameBackupInfoMap.put(table, backupInfo.getStartTs());
- for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table)
- .entrySet()) {
- serverAddressToNewestBackupMap.put(Address.fromString(entry.getKey()),
- entry.getValue());
+ for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table)
+ .entrySet()) {
+ Address address = Address.fromString(entry.getKey());
+ Long storedTs = boundaries.get(address);
+ if (storedTs == null || entry.getValue() < storedTs) {
+ boundaries.put(address, entry.getValue());
  }
  }
  }
  }
 
  if (LOG.isDebugEnabled()) {
- for (Map.Entry<Address, Long> entry : serverAddressToNewestBackupMap.entrySet()) {
- LOG.debug("Server: {}, Newest Backup: {}", entry.getKey().getHostName(), entry.getValue());
+ for (Map.Entry<Address, Long> entry : boundaries.entrySet()) {
+ LOG.debug("Server: {}, WAL cleanup boundary: {}", entry.getKey().getHostName(),
+ entry.getValue());
  }
  }
 
- return serverAddressToNewestBackupMap;
+ return boundaries;
  }
 
  @Override
@@ -128,18 +144,19 @@ public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
  return files;
  }
 
- Map<Address, Long> addressToNewestBackupMap;
+ Map<Address, Long> serverToPreservationBoundaryTs;
  try {
  try (BackupManager backupManager = new BackupManager(conn, getConf())) {
- addressToNewestBackupMap = getServerToNewestBackupTs(backupManager.getBackupHistory(true));
+ serverToPreservationBoundaryTs =
+ serverToPreservationBoundaryTs(backupManager.getBackupHistory(true));
  }
  } catch (IOException ex) {
  LOG.error("Failed to analyse backup history with exception: {}. Retaining all logs",
  ex.getMessage(), ex);
  return Collections.emptyList();
  }
  for (FileStatus file : files) {
- if (canDeleteFile(addressToNewestBackupMap, file.getPath())) {
+ if (canDeleteFile(serverToPreservationBoundaryTs, file.getPath())) {
  filteredFiles.add(file);
  }
  }
@@ -174,7 +191,7 @@ public boolean isStopped() {
  return this.stopped;
  }
 
- protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupMap, Path path) {
+ protected static boolean canDeleteFile(Map<Address, Long> addressToBoundaryTs, Path path) {
  if (isHMasterWAL(path)) {
  return true;
  }
@@ -190,28 +207,28 @@ protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupM
  Address walServerAddress = Address.fromString(hostname);
  long walTimestamp = AbstractFSWALProvider.getTimestamp(path.getName());
 
- if (!addressToNewestBackupMap.containsKey(walServerAddress)) {
+ if (!addressToBoundaryTs.containsKey(walServerAddress)) {
  if (LOG.isDebugEnabled()) {
  LOG.debug("No backup found for server: {}. Deleting file: {}",
  walServerAddress.getHostName(), path);
  }
  return true;
  }
 
- Long lastBackupTs = addressToNewestBackupMap.get(walServerAddress);
- if (lastBackupTs >= walTimestamp) {
+ Long backupBoundary = addressToBoundaryTs.get(walServerAddress);
+ if (backupBoundary >= walTimestamp) {
  if (LOG.isDebugEnabled()) {
  LOG.debug(
- "Backup found for server: {}. Backup from {} is newer than file, so deleting: {}",
- walServerAddress.getHostName(), lastBackupTs, path);
+ "Backup found for server: {}. All backups from {} are newer than file, so deleting: {}",
+ walServerAddress.getHostName(), backupBoundary, path);
  }
  return true;
  }
 
  if (LOG.isDebugEnabled()) {
  LOG.debug(
  "Backup found for server: {}. Backup from {} is older than the file, so keeping: {}",
- walServerAddress.getHostName(), lastBackupTs, path);
+ walServerAddress.getHostName(), backupBoundary, path);
  }
  } catch (Exception ex) {
  LOG.warn("Error occurred while filtering file: {}. Ignoring cleanup of this log", path, ex);

diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java
@@ -17,13 +17,17 @@
  */
 package org.apache.hadoop.hbase.backup.master;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -61,8 +65,10 @@ public class TestBackupLogCleaner extends TestBackupBase {
 
  @Test
  public void testBackupLogCleaner() throws Exception {
+ Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "root1");
+ Path backupRoot2 = new Path(BACKUP_ROOT_DIR, "root2");
 
- // #1 - create full backup for all tables
+ // Create full backup for all tables
  LOG.info("create full backup image for all tables");
 
  List<TableName> tableSetFullList = Lists.newArrayList(table1, table2, table3, table4);
@@ -71,44 +77,43 @@ public void testBackupLogCleaner() throws Exception {
  // Verify that we have no backup sessions yet
  assertFalse(systemTable.hasBackupSessions());
 
- List<FileStatus> walFiles = getListOfWALFiles(TEST_UTIL.getConfiguration());
+ List<FileStatus> walFilesBeforeBackup = getListOfWALFiles(TEST_UTIL.getConfiguration());
  BackupLogCleaner cleaner = new BackupLogCleaner();
  cleaner.setConf(TEST_UTIL.getConfiguration());
  Map<String, Object> params = new HashMap<>();
  params.put(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster());
  cleaner.init(params);
  cleaner.setConf(TEST_UTIL.getConfiguration());
 
- Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFiles);
- int size = Iterables.size(deletable);
-
  // We can delete all files because we do not have yet recorded backup sessions
- assertTrue(size == walFiles.size());
+ Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFilesBeforeBackup);
+ int size = Iterables.size(deletable);
+ assertEquals(walFilesBeforeBackup.size(), size);
 
- String backupIdFull = fullTableBackup(tableSetFullList);
+ // Create a FULL backup (backupRoot 1)
+ String backupIdFull = backupTables(BackupType.FULL, tableSetFullList, backupRoot1.toString());
  assertTrue(checkSucceeded(backupIdFull));
- // Check one more time
- deletable = cleaner.getDeletableFiles(walFiles);
- // We can delete wal files because they were saved into backup system table table
- size = Iterables.size(deletable);
- assertTrue(size == walFiles.size());
 
- List<FileStatus> newWalFiles = getListOfWALFiles(TEST_UTIL.getConfiguration());
- LOG.debug("WAL list after full backup");
+ // New list of WAL files is greater than the previous one,
+ // because new WAL per RS have been opened after full backup
+ Set<FileStatus> walFilesAfterFullBackup =
+ mergeAsSet(walFilesBeforeBackup, getListOfWALFiles(TEST_UTIL.getConfiguration()));
+ assertTrue(walFilesBeforeBackup.size() < walFilesAfterFullBackup.size());
 
- // New list of wal files is greater than the previous one,
- // because new wal per RS have been opened after full backup
- assertTrue(walFiles.size() < newWalFiles.size());
+ // We can only delete the WALs preceding the FULL backup
+ deletable = cleaner.getDeletableFiles(walFilesAfterFullBackup);
+ size = Iterables.size(deletable);
+ assertEquals(walFilesBeforeBackup.size(), size);
+
+ // Insert some data
  Connection conn = ConnectionFactory.createConnection(conf1);
- // #2 - insert some data to table
  Table t1 = conn.getTable(table1);
  Put p1;
  for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
  p1 = new Put(Bytes.toBytes("row-t1" + i));
  p1.addColumn(famName, qualName, Bytes.toBytes("val" + i));
  t1.put(p1);
  }
-
  t1.close();
 
  Table t2 = conn.getTable(table2);
@@ -118,23 +123,49 @@ public void testBackupLogCleaner() throws Exception {
  p2.addColumn(famName, qualName, Bytes.toBytes("val" + i));
  t2.put(p2);
  }
-
  t2.close();
 
- // #3 - incremental backup for multiple tables
-
+ // Create an INCREMENTAL backup (backupRoot 1)
  List<TableName> tableSetIncList = Lists.newArrayList(table1, table2, table3);
  String backupIdIncMultiple =
- backupTables(BackupType.INCREMENTAL, tableSetIncList, BACKUP_ROOT_DIR);
+ backupTables(BackupType.INCREMENTAL, tableSetIncList, backupRoot1.toString());
  assertTrue(checkSucceeded(backupIdIncMultiple));
- deletable = cleaner.getDeletableFiles(newWalFiles);
 
- assertTrue(Iterables.size(deletable) == newWalFiles.size());
+ // There should be more WALs due to the rolling of Region Servers
+ Set<FileStatus> walFilesAfterIncBackup =
+ mergeAsSet(walFilesAfterFullBackup, getListOfWALFiles(TEST_UTIL.getConfiguration()));
+ assertTrue(walFilesAfterFullBackup.size() < walFilesAfterIncBackup.size());
+
+ // We can only delete the WALs preceding the INCREMENTAL backup
+ deletable = cleaner.getDeletableFiles(walFilesAfterIncBackup);
+ size = Iterables.size(deletable);
+ assertEquals(walFilesAfterFullBackup.size(), size);
+
+ // Create a FULL backup (backupRoot 2)
+ String backupIdFull2 = backupTables(BackupType.FULL, tableSetIncList, backupRoot2.toString());
+ assertTrue(checkSucceeded(backupIdFull2));
+
+ // There should be more WALs due to the rolling of Region Servers
+ Set<FileStatus> walFilesAfterFullBackup2 =
+ mergeAsSet(walFilesAfterFullBackup, getListOfWALFiles(TEST_UTIL.getConfiguration()));
+ assertTrue(walFilesAfterIncBackup.size() < walFilesAfterFullBackup2.size());
+
+ // We created a backup in a different root, so the WAL dependencies of the first root did not
+ // change. I.e. the same files should be deletable as after the incremental backup.
+ deletable = cleaner.getDeletableFiles(walFilesAfterFullBackup2);
+ size = Iterables.size(deletable);
+ assertEquals(walFilesAfterFullBackup.size(), size);
 
  conn.close();
  }
  }
 
+ private Set<FileStatus> mergeAsSet(Collection<FileStatus> toCopy, Collection<FileStatus> toAdd) {
+ Set<FileStatus> result = new HashSet<>(toCopy);
+ result.addAll(toAdd);
+ return result;
+ }
+
  @Test
  public void testCleansUpHMasterWal() {
  Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163");