Skip to content

Commit

Permalink
HBASE-28705 BackupLogCleaner cleans required WALs when using multiple…
Browse files Browse the repository at this point in the history
… backuproots

The BackupLogCleaner is responsible for avoiding the deletion of WAL/logs
that still need to be included in a future backup.

The logic to decide which files can be deleted did not work correctly when
multiple backup roots are used. Each backup root has a different chain of
backups (full, incremental1, incremental2, ...). So, if any chain requires
a log, it should be preserved. This was not the case.

The result was that logs could be incorrectly deleted, resulting in data loss
in backups.
  • Loading branch information
DieterDP-ng committed Sep 17, 2024
1 parent f371d51 commit 7d76842
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -81,39 +81,55 @@ public void init(Map<String, Object> params) {
}
}

private Map<Address, Long> getServerToNewestBackupTs(List<BackupInfo> backups)
/**
* Calculates the timestamp boundary up to which all backup roots have already included the WAL.
* I.e. WALs with a lower (= older) or equal timestamp are no longer needed for future incremental
* backups.
*/
private Map<Address, Long> serverToPreservationBoundaryTs(List<BackupInfo> backups)
throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Cleaning WALs if they are older than the newest backups. "
"Cleaning WALs if they are older than the newest backups (for all roots). "
+ "Checking WALs against {} backups: {}",
backups.size(),
backups.stream().map(BackupInfo::getBackupId).sorted().collect(Collectors.joining(", ")));
}
Map<Address, Long> serverAddressToNewestBackupMap = new HashMap<>();

Map<TableName, Long> tableNameBackupInfoMap = new HashMap<>();
for (BackupInfo backupInfo : backups) {
// This map tracks, for every backup root, the most recent created backup (= highest timestamp)
Map<String, BackupInfo> newestBackupPerRootDir = new HashMap<>();
for (BackupInfo backup : backups) {
BackupInfo existingEntry = newestBackupPerRootDir.get(backup.getBackupRootDir());
if (existingEntry == null || existingEntry.getStartTs() < backup.getStartTs()) {
newestBackupPerRootDir.put(backup.getBackupRootDir(), backup);
}
}

// This map tracks, for every address, the least recent (= oldest / lowest timestamp) inclusion
// in any backup. In other words, it is the timestamp boundary up to which all backups roots
// have included the WAL in their backup.
Map<Address, Long> boundaries = new HashMap<>();
for (BackupInfo backupInfo : newestBackupPerRootDir.values()) {
for (TableName table : backupInfo.getTables()) {
tableNameBackupInfoMap.putIfAbsent(table, backupInfo.getStartTs());
if (tableNameBackupInfoMap.get(table) <= backupInfo.getStartTs()) {
tableNameBackupInfoMap.put(table, backupInfo.getStartTs());
for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table)
.entrySet()) {
serverAddressToNewestBackupMap.put(Address.fromString(entry.getKey()),
entry.getValue());
for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table)
.entrySet()) {
Address address = Address.fromString(entry.getKey());
Long storedTs = boundaries.get(address);
if (storedTs == null || entry.getValue() < storedTs) {
boundaries.put(address, entry.getValue());
}
}
}
}

if (LOG.isDebugEnabled()) {
for (Map.Entry<Address, Long> entry : serverAddressToNewestBackupMap.entrySet()) {
LOG.debug("Server: {}, Newest Backup: {}", entry.getKey().getHostName(), entry.getValue());
for (Map.Entry<Address, Long> entry : boundaries.entrySet()) {
LOG.debug("Server: {}, WAL cleanup boundary: {}", entry.getKey().getHostName(),
entry.getValue());
}
}

return serverAddressToNewestBackupMap;
return boundaries;
}

@Override
Expand All @@ -128,18 +144,19 @@ public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) {
return files;
}

Map<Address, Long> addressToNewestBackupMap;
Map<Address, Long> serverToPreservationBoundaryTs;
try {
try (BackupManager backupManager = new BackupManager(conn, getConf())) {
addressToNewestBackupMap = getServerToNewestBackupTs(backupManager.getBackupHistory(true));
serverToPreservationBoundaryTs =
serverToPreservationBoundaryTs(backupManager.getBackupHistory(true));
}
} catch (IOException ex) {
LOG.error("Failed to analyse backup history with exception: {}. Retaining all logs",
ex.getMessage(), ex);
return Collections.emptyList();
}
for (FileStatus file : files) {
if (canDeleteFile(addressToNewestBackupMap, file.getPath())) {
if (canDeleteFile(serverToPreservationBoundaryTs, file.getPath())) {
filteredFiles.add(file);
}
}
Expand Down Expand Up @@ -174,7 +191,7 @@ public boolean isStopped() {
return this.stopped;
}

protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupMap, Path path) {
protected static boolean canDeleteFile(Map<Address, Long> addressToBoundaryTs, Path path) {
if (isHMasterWAL(path)) {
return true;
}
Expand All @@ -190,28 +207,28 @@ protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupM
Address walServerAddress = Address.fromString(hostname);
long walTimestamp = AbstractFSWALProvider.getTimestamp(path.getName());

if (!addressToNewestBackupMap.containsKey(walServerAddress)) {
if (!addressToBoundaryTs.containsKey(walServerAddress)) {
if (LOG.isDebugEnabled()) {
LOG.debug("No backup found for server: {}. Deleting file: {}",
walServerAddress.getHostName(), path);
}
return true;
}

Long lastBackupTs = addressToNewestBackupMap.get(walServerAddress);
if (lastBackupTs >= walTimestamp) {
Long backupBoundary = addressToBoundaryTs.get(walServerAddress);
if (backupBoundary >= walTimestamp) {
if (LOG.isDebugEnabled()) {
LOG.debug(
"Backup found for server: {}. Backup from {} is newer than file, so deleting: {}",
walServerAddress.getHostName(), lastBackupTs, path);
"Backup found for server: {}. All backups from {} are newer than file, so deleting: {}",
walServerAddress.getHostName(), backupBoundary, path);
}
return true;
}

if (LOG.isDebugEnabled()) {
LOG.debug(
"Backup found for server: {}. Backup from {} is older than the file, so keeping: {}",
walServerAddress.getHostName(), lastBackupTs, path);
walServerAddress.getHostName(), backupBoundary, path);
}
} catch (Exception ex) {
LOG.warn("Error occurred while filtering file: {}. Ignoring cleanup of this log", path, ex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ public class TestBackupLogCleaner extends TestBackupBase {

@Test
public void testBackupLogCleaner() throws Exception {
Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "root1");
Path backupRoot2 = new Path(BACKUP_ROOT_DIR, "root2");

// Create full backup for all tables
LOG.info("create full backup image for all tables");
Expand All @@ -88,8 +90,8 @@ public void testBackupLogCleaner() throws Exception {
int size = Iterables.size(deletable);
assertEquals(walFilesBeforeBackup.size(), size);

// Create a FULL backup
String backupIdFull = fullTableBackup(tableSetFullList);
// Create a FULL backup (backupRoot 1)
String backupIdFull = backupTables(BackupType.FULL, tableSetFullList, backupRoot1.toString());
assertTrue(checkSucceeded(backupIdFull));

// New list of WAL files is greater than the previous one,
Expand Down Expand Up @@ -123,10 +125,10 @@ public void testBackupLogCleaner() throws Exception {
}
t2.close();

// Create an INCREMENTAL backup
// Create an INCREMENTAL backup (backupRoot 1)
List<TableName> tableSetIncList = Lists.newArrayList(table1, table2, table3);
String backupIdIncMultiple =
backupTables(BackupType.INCREMENTAL, tableSetIncList, BACKUP_ROOT_DIR);
backupTables(BackupType.INCREMENTAL, tableSetIncList, backupRoot1.toString());
assertTrue(checkSucceeded(backupIdIncMultiple));

// There should be more WALs due to the rolling of Region Servers
Expand All @@ -139,6 +141,21 @@ public void testBackupLogCleaner() throws Exception {
size = Iterables.size(deletable);
assertEquals(walFilesAfterFullBackup.size(), size);

// Create a FULL backup (backupRoot 2)
String backupIdFull2 = backupTables(BackupType.FULL, tableSetIncList, backupRoot2.toString());
assertTrue(checkSucceeded(backupIdFull2));

// There should be more WALs due to the rolling of Region Servers
Set<FileStatus> walFilesAfterFullBackup2 =
mergeAsSet(walFilesAfterFullBackup, getListOfWALFiles(TEST_UTIL.getConfiguration()));
assertTrue(walFilesAfterIncBackup.size() < walFilesAfterFullBackup2.size());

// We created a backup in a different root, so the WAL dependencies of the first root did not
// change. I.e. the same files should be deletable as after the incremental backup.
deletable = cleaner.getDeletableFiles(walFilesAfterFullBackup2);
size = Iterables.size(deletable);
assertEquals(walFilesAfterFullBackup.size(), size);

conn.close();
}
}
Expand Down

0 comments on commit 7d76842

Please sign in to comment.