From 3a170578b04d5947c655319f1b8d8475ccb6bb4e Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Fri, 17 May 2024 21:58:15 +0200 Subject: [PATCH] HubSpot Backport: HBASE-28502 Cleanup old backup manifest logic (#5871) (#95) In older versions of HBase's backup mechanism, a manifest was written per table being backed up. This was since refactored to one manifest per backup, but the manifest code was not updated. A concrete issue with the old code was that the manifest for full backups did not correctly list the tables included in the backup. Reviewed-by: Ray Mattingly Signed-off-by: Nick Dimiduk Co-authored-by: DieterDP <90392398+DieterDP-ng@users.noreply.github.com> --- .../hbase/backup/HBackupFileSystem.java | 44 +------------------ .../hbase/backup/impl/BackupAdminImpl.java | 8 ++-- .../hbase/backup/impl/BackupManifest.java | 2 +- .../backup/impl/RestoreTablesClient.java | 17 +++---- .../hbase/backup/impl/TableBackupClient.java | 42 +++--------------- .../hadoop/hbase/backup/util/BackupUtils.java | 7 ++- .../hadoop/hbase/backup/TestFullBackup.java | 11 +++++ .../hbase/backup/TestIncrementalBackup.java | 8 ++++ 8 files changed, 42 insertions(+), 97 deletions(-) diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java index c41a4a182435..2b27e7527477 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/HBackupFileSystem.java @@ -18,11 +18,9 @@ package org.apache.hadoop.hbase.backup; import java.io.IOException; -import java.util.HashMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.yetus.audience.InterfaceAudience; @@ -44,8 +42,8 @@ private HBackupFileSystem() { } /** - * Given the backup root dir, backup id and the table name, return the backup image location, - * which is also where the backup manifest file is. return value look like: + * Given the backup root dir, backup id and the table name, return the backup image location. + * Return value look like: * "hdfs://backup.hbase.org:9000/user/biadmin/backup/backup_1396650096738/default/t1_dn/", where * "hdfs://backup.hbase.org:9000/user/biadmin/backup" is a backup root directory * @param backupRootDir backup root directory @@ -79,11 +77,6 @@ public static Path getBackupTmpDirPathForBackupId(String backupRoot, String back return new Path(getBackupTmpDirPath(backupRoot), backupId); } - public static String getTableBackupDataDir(String backupRootDir, String backupId, - TableName tableName) { - return getTableBackupDir(backupRootDir, backupId, tableName) + Path.SEPARATOR + "data"; - } - public static Path getBackupPath(String backupRootDir, String backupId) { return new Path(backupRootDir + Path.SEPARATOR + backupId); } @@ -102,24 +95,6 @@ public static Path getTableBackupPath(TableName tableName, Path backupRootPath, return new Path(getTableBackupDir(backupRootPath.toString(), backupId, tableName)); } - /** - * Given the backup root dir and the backup id, return the log file location for an incremental - * backup. - * @param backupRootDir backup root directory - * @param backupId backup id - * @return logBackupDir: ".../user/biadmin/backup/WALs/backup_1396650096738" - */ - public static String getLogBackupDir(String backupRootDir, String backupId) { - return backupRootDir + Path.SEPARATOR + backupId + Path.SEPARATOR - + HConstants.HREGION_LOGDIR_NAME; - } - - public static Path getLogBackupPath(String backupRootDir, String backupId) { - return new Path(getLogBackupDir(backupRootDir, backupId)); - } - - // TODO we do not keep WAL files anymore - // Move manifest file to other place private static Path getManifestPath(Configuration conf, Path backupRootPath, String backupId) throws IOException { FileSystem fs = backupRootPath.getFileSystem(conf); @@ -140,19 +115,4 @@ public static BackupManifest getManifest(Configuration conf, Path backupRootPath new BackupManifest(conf, getManifestPath(conf, backupRootPath, backupId)); return manifest; } - - /** - * Check whether the backup image path and there is manifest file in the path. - * @param backupManifestMap If all the manifests are found, then they are put into this map - * @param tableArray the tables involved - * @throws IOException exception - */ - public static void checkImageManifestExist(HashMap backupManifestMap, - TableName[] tableArray, Configuration conf, Path backupRootPath, String backupId) - throws IOException { - for (TableName tableName : tableArray) { - BackupManifest manifest = getManifest(conf, backupRootPath, backupId); - backupManifestMap.put(tableName, manifest); - } - } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java index f580fb0c47bb..69aef51a4ed3 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupAdminImpl.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -498,16 +499,15 @@ private String[] toStringArray(TableName[] list) { @Override public void restore(RestoreRequest request) throws IOException { if (request.isCheck()) { - HashMap backupManifestMap = new HashMap<>(); // check and load backup image manifest for the tables Path rootPath = new Path(request.getBackupRootDir()); String backupId = request.getBackupId(); TableName[] sTableArray = request.getFromTables(); - HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, - conn.getConfiguration(), rootPath, backupId); + BackupManifest manifest = + HBackupFileSystem.getManifest(conn.getConfiguration(), rootPath, backupId); // Check and validate the backup image and its dependencies - if (BackupUtils.validate(backupManifestMap, conn.getConfiguration())) { + if (BackupUtils.validate(Arrays.asList(sTableArray), manifest, conn.getConfiguration())) { LOG.info(CHECK_OK); } else { LOG.error(CHECK_FAILED); diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java index 3a1cbd55c58e..237d8686ab79 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManifest.java @@ -465,7 +465,7 @@ public List getTableList() { } /** - * TODO: fix it. Persist the manifest file. + * Persist the manifest file. * @throws BackupException if an error occurred while storing the manifest file. */ public void store(Configuration conf) throws BackupException { diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java index 05685c8e091e..df88d832e423 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/RestoreTablesClient.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.TreeSet; import org.apache.commons.lang3.StringUtils; @@ -203,19 +202,17 @@ private List getFilesRecursively(String fileBackupDir) /** * Restore operation. Stage 2: resolved Backup Image dependency - * @param backupManifestMap : tableName, Manifest - * @param sTableArray The array of tables to be restored - * @param tTableArray The array of mapping tables to restore to + * @param sTableArray The array of tables to be restored + * @param tTableArray The array of mapping tables to restore to * @throws IOException exception */ - private void restore(HashMap backupManifestMap, - TableName[] sTableArray, TableName[] tTableArray, boolean isOverwrite) throws IOException { + private void restore(BackupManifest manifest, TableName[] sTableArray, TableName[] tTableArray, + boolean isOverwrite) throws IOException { TreeSet restoreImageSet = new TreeSet<>(); for (int i = 0; i < sTableArray.length; i++) { TableName table = sTableArray[i]; - BackupManifest manifest = backupManifestMap.get(table); // Get the image list of this backup for restore in time order from old // to new. List list = new ArrayList<>(); @@ -255,12 +252,10 @@ public void execute() throws IOException { checkTargetTables(tTableArray, isOverwrite); // case RESTORE_IMAGES: - HashMap backupManifestMap = new HashMap<>(); // check and load backup image manifest for the tables Path rootPath = new Path(backupRootDir); - HBackupFileSystem.checkImageManifestExist(backupManifestMap, sTableArray, conf, rootPath, - backupId); + BackupManifest manifest = HBackupFileSystem.getManifest(conf, rootPath, backupId); - restore(backupManifestMap, sTableArray, tTableArray, isOverwrite); + restore(manifest, sTableArray, tTableArray, isOverwrite); } } diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java index 60dbc6470a77..e758ced3f846 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/TableBackupClient.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -268,7 +267,7 @@ public static void cleanupAndRestoreBackupSystem(Connection conn, BackupInfo bac } /** - * Add manifest for the current backup. The manifest is stored within the table backup directory. + * Creates a manifest based on the provided info, and store it in the backup-specific directory. * @param backupInfo The current backup info * @throws IOException exception */ @@ -277,43 +276,16 @@ protected void addManifest(BackupInfo backupInfo, BackupManager backupManager, B // set the overall backup phase : store manifest backupInfo.setPhase(BackupPhase.STORE_MANIFEST); - BackupManifest manifest; - - // Since we have each table's backup in its own directory structure, - // we'll store its manifest with the table directory. - for (TableName table : backupInfo.getTables()) { - manifest = new BackupManifest(backupInfo, table); - ArrayList ancestors = backupManager.getAncestors(backupInfo, table); - for (BackupImage image : ancestors) { - manifest.addDependentImage(image); - } - - if (type == BackupType.INCREMENTAL) { - // We'll store the log timestamps for this table only in its manifest. - Map> tableTimestampMap = new HashMap<>(); - tableTimestampMap.put(table, backupInfo.getIncrTimestampMap().get(table)); - manifest.setIncrTimestampMap(tableTimestampMap); - ArrayList ancestorss = backupManager.getAncestors(backupInfo); - for (BackupImage image : ancestorss) { - manifest.addDependentImage(image); - } - } - manifest.store(conf); - } - - // For incremental backup, we store a overall manifest in - // /WALs/ - // This is used when created the next incremental backup + BackupManifest manifest = new BackupManifest(backupInfo); if (type == BackupType.INCREMENTAL) { - manifest = new BackupManifest(backupInfo); // set the table region server start and end timestamps for incremental backup manifest.setIncrTimestampMap(backupInfo.getIncrTimestampMap()); - ArrayList ancestors = backupManager.getAncestors(backupInfo); - for (BackupImage image : ancestors) { - manifest.addDependentImage(image); - } - manifest.store(conf); } + ArrayList ancestors = backupManager.getAncestors(backupInfo); + for (BackupImage image : ancestors) { + manifest.addDependentImage(image); + } + manifest.store(conf); } /** diff --git a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java index d8bcfdbd6cac..9f1ee261b69e 100644 --- a/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java +++ b/hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java @@ -663,15 +663,14 @@ public static RestoreRequest createRestoreRequest(String backupRootDir, String b return request; } - public static boolean validate(HashMap backupManifestMap, + public static boolean validate(List tables, BackupManifest backupManifest, Configuration conf) throws IOException { boolean isValid = true; - for (Entry manifestEntry : backupManifestMap.entrySet()) { - TableName table = manifestEntry.getKey(); + for (TableName table : tables) { TreeSet imageSet = new TreeSet<>(); - ArrayList depList = manifestEntry.getValue().getDependentListByTable(table); + ArrayList depList = backupManifest.getDependentListByTable(table); if (depList != null && !depList.isEmpty()) { imageSet.addAll(depList); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java index 7cec06799742..ba09817fcdec 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestFullBackup.java @@ -17,10 +17,14 @@ */ package org.apache.hadoop.hbase.backup; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.util.HashSet; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.util.ToolRunner; @@ -30,6 +34,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; + @Category(LargeTests.class) public class TestFullBackup extends TestBackupBase { @@ -56,6 +62,11 @@ public void testFullBackupMultipleCommand() throws Exception { String backupId = data.getBackupId(); assertTrue(checkSucceeded(backupId)); } + + BackupInfo newestBackup = backups.get(0); + BackupManifest manifest = + HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), newestBackup.getBackupId()); + assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList())); } LOG.info("backup complete"); } diff --git a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java index 619e6b14e644..bc9523fc13cf 100644 --- a/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java +++ b/hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/TestIncrementalBackup.java @@ -17,15 +17,19 @@ */ package org.apache.hadoop.hbase.backup; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.List; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl; +import org.apache.hadoop.hbase.backup.impl.BackupManifest; import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; @@ -49,6 +53,7 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; @Category(LargeTests.class) @RunWith(Parameterized.class) @@ -145,6 +150,9 @@ public void TestIncBackupRestore() throws Exception { request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR); String backupIdIncMultiple = client.backupTables(request); assertTrue(checkSucceeded(backupIdIncMultiple)); + BackupManifest manifest = + HBackupFileSystem.getManifest(conf1, new Path(BACKUP_ROOT_DIR), backupIdIncMultiple); + assertEquals(Sets.newHashSet(table1, table2), new HashSet<>(manifest.getTableList())); // add column family f2 to table1 // drop column family f3