-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-28705 backup log cleaner cleans required walls when using multiple backuproots #6040
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,39 +81,55 @@ public void init(Map<String, Object> params) { | |
} | ||
} | ||
|
||
private Map<Address, Long> getServerToNewestBackupTs(List<BackupInfo> backups) | ||
/** | ||
* Calculates the timestamp boundary up to which all backup roots have already included the WAL. | ||
* I.e. WALs with a lower (= older) or equal timestamp are no longer needed for future incremental | ||
* backups. | ||
*/ | ||
private Map<Address, Long> serverToPreservationBoundaryTs(List<BackupInfo> backups) | ||
throws IOException { | ||
if (LOG.isDebugEnabled()) { | ||
LOG.debug( | ||
"Cleaning WALs if they are older than the newest backups. " | ||
"Cleaning WALs if they are older than the newest backups (for all roots). " | ||
+ "Checking WALs against {} backups: {}", | ||
backups.size(), | ||
backups.stream().map(BackupInfo::getBackupId).sorted().collect(Collectors.joining(", "))); | ||
} | ||
Map<Address, Long> serverAddressToNewestBackupMap = new HashMap<>(); | ||
|
||
Map<TableName, Long> tableNameBackupInfoMap = new HashMap<>(); | ||
for (BackupInfo backupInfo : backups) { | ||
// This map tracks, for every backup root, the most recent created backup (= highest timestamp) | ||
Map<String, BackupInfo> newestBackupPerRootDir = new HashMap<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can there be multiple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A backup root corresponds to the path you specify when doing Eg, (F = full backup, I = incremental backup):
So to answer your questions:
|
||
for (BackupInfo backup : backups) { | ||
BackupInfo existingEntry = newestBackupPerRootDir.get(backup.getBackupRootDir()); | ||
if (existingEntry == null || existingEntry.getStartTs() < backup.getStartTs()) { | ||
newestBackupPerRootDir.put(backup.getBackupRootDir(), backup); | ||
} | ||
} | ||
|
||
// This map tracks, for every address, the least recent (= oldest / lowest timestamp) inclusion | ||
// in any backup. In other words, it is the timestamp boundary up to which all backups roots | ||
// have included the WAL in their backup. | ||
Map<Address, Long> boundaries = new HashMap<>(); | ||
for (BackupInfo backupInfo : newestBackupPerRootDir.values()) { | ||
for (TableName table : backupInfo.getTables()) { | ||
tableNameBackupInfoMap.putIfAbsent(table, backupInfo.getStartTs()); | ||
if (tableNameBackupInfoMap.get(table) <= backupInfo.getStartTs()) { | ||
tableNameBackupInfoMap.put(table, backupInfo.getStartTs()); | ||
for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table) | ||
.entrySet()) { | ||
serverAddressToNewestBackupMap.put(Address.fromString(entry.getKey()), | ||
entry.getValue()); | ||
for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table) | ||
.entrySet()) { | ||
Address address = Address.fromString(entry.getKey()); | ||
Long storedTs = boundaries.get(address); | ||
if (storedTs == null || entry.getValue() < storedTs) { | ||
boundaries.put(address, entry.getValue()); | ||
} | ||
} | ||
} | ||
} | ||
|
||
if (LOG.isDebugEnabled()) { | ||
for (Map.Entry<Address, Long> entry : serverAddressToNewestBackupMap.entrySet()) { | ||
LOG.debug("Server: {}, Newest Backup: {}", entry.getKey().getHostName(), entry.getValue()); | ||
for (Map.Entry<Address, Long> entry : boundaries.entrySet()) { | ||
LOG.debug("Server: {}, WAL cleanup boundary: {}", entry.getKey().getHostName(), | ||
entry.getValue()); | ||
} | ||
} | ||
|
||
return serverAddressToNewestBackupMap; | ||
return boundaries; | ||
} | ||
|
||
@Override | ||
|
@@ -128,18 +144,19 @@ public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) { | |
return files; | ||
} | ||
|
||
Map<Address, Long> addressToNewestBackupMap; | ||
Map<Address, Long> serverToPreservationBoundaryTs; | ||
try { | ||
try (BackupManager backupManager = new BackupManager(conn, getConf())) { | ||
addressToNewestBackupMap = getServerToNewestBackupTs(backupManager.getBackupHistory(true)); | ||
serverToPreservationBoundaryTs = | ||
serverToPreservationBoundaryTs(backupManager.getBackupHistory(true)); | ||
} | ||
} catch (IOException ex) { | ||
LOG.error("Failed to analyse backup history with exception: {}. Retaining all logs", | ||
ex.getMessage(), ex); | ||
return Collections.emptyList(); | ||
} | ||
for (FileStatus file : files) { | ||
if (canDeleteFile(addressToNewestBackupMap, file.getPath())) { | ||
if (canDeleteFile(serverToPreservationBoundaryTs, file.getPath())) { | ||
filteredFiles.add(file); | ||
} | ||
} | ||
|
@@ -174,7 +191,7 @@ public boolean isStopped() { | |
return this.stopped; | ||
} | ||
|
||
protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupMap, Path path) { | ||
protected static boolean canDeleteFile(Map<Address, Long> addressToBoundaryTs, Path path) { | ||
if (isHMasterWAL(path)) { | ||
return true; | ||
} | ||
|
@@ -190,28 +207,28 @@ protected static boolean canDeleteFile(Map<Address, Long> addressToNewestBackupM | |
Address walServerAddress = Address.fromString(hostname); | ||
long walTimestamp = AbstractFSWALProvider.getTimestamp(path.getName()); | ||
|
||
if (!addressToNewestBackupMap.containsKey(walServerAddress)) { | ||
if (!addressToBoundaryTs.containsKey(walServerAddress)) { | ||
if (LOG.isDebugEnabled()) { | ||
LOG.debug("No backup found for server: {}. Deleting file: {}", | ||
walServerAddress.getHostName(), path); | ||
} | ||
return true; | ||
} | ||
|
||
Long lastBackupTs = addressToNewestBackupMap.get(walServerAddress); | ||
if (lastBackupTs >= walTimestamp) { | ||
Long backupBoundary = addressToBoundaryTs.get(walServerAddress); | ||
if (backupBoundary >= walTimestamp) { | ||
if (LOG.isDebugEnabled()) { | ||
LOG.debug( | ||
"Backup found for server: {}. Backup from {} is newer than file, so deleting: {}", | ||
walServerAddress.getHostName(), lastBackupTs, path); | ||
"Backup found for server: {}. All backups from {} are newer than file, so deleting: {}", | ||
walServerAddress.getHostName(), backupBoundary, path); | ||
} | ||
return true; | ||
} | ||
|
||
if (LOG.isDebugEnabled()) { | ||
LOG.debug( | ||
"Backup found for server: {}. Backup from {} is older than the file, so keeping: {}", | ||
walServerAddress.getHostName(), lastBackupTs, path); | ||
walServerAddress.getHostName(), backupBoundary, path); | ||
} | ||
} catch (Exception ex) { | ||
LOG.warn("Error occurred while filtering file: {}. Ignoring cleanup of this log", path, ex); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,13 +17,17 @@ | |
*/ | ||
package org.apache.hadoop.hbase.backup.master; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.hbase.HBaseClassTestRule; | ||
|
@@ -61,8 +65,10 @@ public class TestBackupLogCleaner extends TestBackupBase { | |
|
||
@Test | ||
public void testBackupLogCleaner() throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test is not quite what you described in your earlier comment, and I'm pretty sure that it doesn't test the criteria in the same way either. It's a valid test case, but I think it's missing some cases that we would like to see covered. In this test, we have a single region server, which serves as S2 from your above example.
Based on my understanding, this test never sees a point when a wal can be discarded because wals from table4 are never rolled and so the wals identified for table4 in B1 are always the "oldest of the newest" for this server. I think that we'd like to see a test that does what you described in your initial comment: observes the "rising tide" of wal timestamp on the server as backups across roots progress. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've majorly refactored & extended the testcase, and found a bug along the way. |
||
Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "root1"); | ||
Path backupRoot2 = new Path(BACKUP_ROOT_DIR, "root2"); | ||
|
||
// #1 - create full backup for all tables | ||
// Create full backup for all tables | ||
LOG.info("create full backup image for all tables"); | ||
|
||
List<TableName> tableSetFullList = Lists.newArrayList(table1, table2, table3, table4); | ||
|
@@ -71,44 +77,43 @@ public void testBackupLogCleaner() throws Exception { | |
// Verify that we have no backup sessions yet | ||
assertFalse(systemTable.hasBackupSessions()); | ||
|
||
List<FileStatus> walFiles = getListOfWALFiles(TEST_UTIL.getConfiguration()); | ||
List<FileStatus> walFilesBeforeBackup = getListOfWALFiles(TEST_UTIL.getConfiguration()); | ||
BackupLogCleaner cleaner = new BackupLogCleaner(); | ||
cleaner.setConf(TEST_UTIL.getConfiguration()); | ||
Map<String, Object> params = new HashMap<>(); | ||
params.put(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster()); | ||
cleaner.init(params); | ||
cleaner.setConf(TEST_UTIL.getConfiguration()); | ||
|
||
Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFiles); | ||
int size = Iterables.size(deletable); | ||
|
||
// We can delete all files because we do not have yet recorded backup sessions | ||
assertTrue(size == walFiles.size()); | ||
Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFilesBeforeBackup); | ||
int size = Iterables.size(deletable); | ||
assertEquals(walFilesBeforeBackup.size(), size); | ||
|
||
String backupIdFull = fullTableBackup(tableSetFullList); | ||
// Create a FULL backup (backupRoot 1) | ||
String backupIdFull = backupTables(BackupType.FULL, tableSetFullList, backupRoot1.toString()); | ||
assertTrue(checkSucceeded(backupIdFull)); | ||
// Check one more time | ||
deletable = cleaner.getDeletableFiles(walFiles); | ||
// We can delete wal files because they were saved into backup system table table | ||
size = Iterables.size(deletable); | ||
assertTrue(size == walFiles.size()); | ||
|
||
List<FileStatus> newWalFiles = getListOfWALFiles(TEST_UTIL.getConfiguration()); | ||
LOG.debug("WAL list after full backup"); | ||
// New list of WAL files is greater than the previous one, | ||
// because new WAL per RS have been opened after full backup | ||
Set<FileStatus> walFilesAfterFullBackup = | ||
mergeAsSet(walFilesBeforeBackup, getListOfWALFiles(TEST_UTIL.getConfiguration())); | ||
assertTrue(walFilesBeforeBackup.size() < walFilesAfterFullBackup.size()); | ||
|
||
// New list of wal files is greater than the previous one, | ||
// because new wal per RS have been opened after full backup | ||
assertTrue(walFiles.size() < newWalFiles.size()); | ||
// We can only delete the WALs preceding the FULL backup | ||
deletable = cleaner.getDeletableFiles(walFilesAfterFullBackup); | ||
size = Iterables.size(deletable); | ||
assertEquals(walFilesBeforeBackup.size(), size); | ||
|
||
// Insert some data | ||
Connection conn = ConnectionFactory.createConnection(conf1); | ||
DieterDP-ng marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// #2 - insert some data to table | ||
Table t1 = conn.getTable(table1); | ||
DieterDP-ng marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Put p1; | ||
for (int i = 0; i < NB_ROWS_IN_BATCH; i++) { | ||
p1 = new Put(Bytes.toBytes("row-t1" + i)); | ||
p1.addColumn(famName, qualName, Bytes.toBytes("val" + i)); | ||
t1.put(p1); | ||
} | ||
|
||
t1.close(); | ||
|
||
Table t2 = conn.getTable(table2); | ||
DieterDP-ng marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -118,23 +123,49 @@ public void testBackupLogCleaner() throws Exception { | |
p2.addColumn(famName, qualName, Bytes.toBytes("val" + i)); | ||
t2.put(p2); | ||
} | ||
|
||
t2.close(); | ||
|
||
// #3 - incremental backup for multiple tables | ||
|
||
// Create an INCREMENTAL backup (backupRoot 1) | ||
List<TableName> tableSetIncList = Lists.newArrayList(table1, table2, table3); | ||
String backupIdIncMultiple = | ||
backupTables(BackupType.INCREMENTAL, tableSetIncList, BACKUP_ROOT_DIR); | ||
backupTables(BackupType.INCREMENTAL, tableSetIncList, backupRoot1.toString()); | ||
assertTrue(checkSucceeded(backupIdIncMultiple)); | ||
deletable = cleaner.getDeletableFiles(newWalFiles); | ||
|
||
assertTrue(Iterables.size(deletable) == newWalFiles.size()); | ||
// There should be more WALs due to the rolling of Region Servers | ||
Set<FileStatus> walFilesAfterIncBackup = | ||
mergeAsSet(walFilesAfterFullBackup, getListOfWALFiles(TEST_UTIL.getConfiguration())); | ||
assertTrue(walFilesAfterFullBackup.size() < walFilesAfterIncBackup.size()); | ||
|
||
// We can only delete the WALs preceding the INCREMENTAL backup | ||
deletable = cleaner.getDeletableFiles(walFilesAfterIncBackup); | ||
size = Iterables.size(deletable); | ||
assertEquals(walFilesAfterFullBackup.size(), size); | ||
|
||
// Create a FULL backup (backupRoot 2) | ||
String backupIdFull2 = backupTables(BackupType.FULL, tableSetIncList, backupRoot2.toString()); | ||
assertTrue(checkSucceeded(backupIdFull2)); | ||
|
||
// There should be more WALs due to the rolling of Region Servers | ||
Set<FileStatus> walFilesAfterFullBackup2 = | ||
mergeAsSet(walFilesAfterFullBackup, getListOfWALFiles(TEST_UTIL.getConfiguration())); | ||
assertTrue(walFilesAfterIncBackup.size() < walFilesAfterFullBackup2.size()); | ||
|
||
// We created a backup in a different root, so the WAL dependencies of the first root did not | ||
// change. I.e. the same files should be deletable as after the incremental backup. | ||
DieterDP-ng marked this conversation as resolved.
Show resolved
Hide resolved
|
||
deletable = cleaner.getDeletableFiles(walFilesAfterFullBackup2); | ||
size = Iterables.size(deletable); | ||
assertEquals(walFilesAfterFullBackup.size(), size); | ||
|
||
conn.close(); | ||
} | ||
} | ||
|
||
private Set<FileStatus> mergeAsSet(Collection<FileStatus> toCopy, Collection<FileStatus> toAdd) { | ||
Set<FileStatus> result = new HashSet<>(toCopy); | ||
result.addAll(toAdd); | ||
return result; | ||
} | ||
|
||
@Test | ||
public void testCleansUpHMasterWal() { | ||
Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this method have a precondition asserting that all BackupInfos in the list have an identical table set?
Otherwise I think this could cause data loss in an incremental backup. Let's say we have one-table and another-table on our cluster, and we back up each one independently. Let's say we first took a backup for one-table, then a backup for another-table. Then we passed the corresponding BackupInfos into this method, this would yield preservation boundaries which would remove WALs necessary for the next incremental backup of one-table
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see an issue. Trying to follow your example:
At time:
Following the logic in this method:
So for T1, all WALs up to t=10 can be deleted, for T2, WALs will be preserved from t=10 or t=20, depending whether other tables are present.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the explanation, both this and below. I think I follow you. Let me rephrase.
newestBackupPerRootDir
is a "race to the top" in terms of identifying the most recent timestamp across backups, whileboundaries
is a "race to the bottom" in terms of identifying the least recent backup present on each server. By selecting the "oldest of the newest" you determine the minimum timestamp that must be preserved for each server. Any WALs older than this minimum timestamp can be safely discarded.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool, I think the missing piece for me was that a backup root was specific to a given table
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It isn't. Each backup root contains one or more tables, i.e. a set of tables, these table sets may or may not have tables in common.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, okay so reframing your example in a single root:
Imagine
3 servers S1, S2, S3
table T1 has regions on S1 and S2, table T2 has regions on S2 and S3
we have 1 backup root R1 that backs up T1 and T2
At time:
t=0, we backup T1 in R1 => backup B1
t=0, we backup T2 in R1 => backup B2
t=10, we backup T1 in R1 => backup B3
t=20, we backup T2 in R1 => backup B4
Following the logic in this method:
newestBackupPerRootDir
will contain: (R1: B4)We would calculate the preservation boundaries here:
Since the newest backup in R1 is B4,
serverToPreservationBoundaryTs
will contain (S1: 20, S2: 20, S3: 20)In this situation, we must not delete WALs from S1 or S2 between times 10 and 20 because a subsequent incremental backup of T1 will require those WALs.
In the BackupLogCleaner, though, we will end up calling
canDeleteFile
and hitting this code with thatserverToPreservationBoundaryTs
(S1: 20, S2: 20, S3: 20) renamed asaddressToBoundaryTs
:I guess this depends on how ancestry is defined? If we consider all backups in a root to be ancestors regardless of their table set, then maybe it is okay to delete these WALs. But, if not, then I don't see how it is okay?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed
I think this is wrong.
To make things more concrete, I'm going to assume all backups were full backups, and B1 == B2 (since it has the same timestamp). I.e. the backups were:
At this point, the data in
backupInfo.getTableSetTimestampMap()
will be:and
serverToPreservationBoundaryTs
will be (S1: 10, S2: 10, S3: 20).The reason that the BackupInfo of B4 contains the log timestamps of tables not included in B4 is due to how the backup client updates these:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for being a pain here, but I'm not sure I agree. When building
serverToPreservationBoundaryTs
we loop through:and we agree that
newestBackupPerRootDir.values()
will only contain B4. So our boundaries would end up only being based on the timestamps from B4? How does the second newest backup, and beyond, in the root come into play?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Answering my own questions a bit here, I think I needed to look deeper into how the BackupInfo gets constructed. When the tableSetTimestampMap is populated, it will contain entries for every table in the root, not just every table in the given backup. So while B4 might only pertain to T2, its corresponding BackupInfo will be aware of the timestamps for T1. That's a confusing design imo, but if true then I think this logic is sound
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's indeed like that. Doing it that way is a good way to avoid some special cases where backups in the same root backup do not contain the same tables, but the docs surrounding those BackupInfo concepts is indeed lacking.