Skip to content

Commit

Permalink
HBASE-28897 Force CF compatibility during incremental backup (#6340) (#…
Browse files Browse the repository at this point in the history
…6358)

Signed-off-by: Ray Mattingly <rmattingly@apache.org>
Co-authored-by: Hernan Romer <nanug33@gmail.com>
Co-authored-by: Hernan Gelaf-Romer <hgelafromer@hubspot.com>
  • Loading branch information
3 people authored Oct 9, 2024
1 parent 30c9cb0 commit e2bece6
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.backup.impl;

import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.yetus.audience.InterfaceAudience;

@InterfaceAudience.Public
public final class ColumnFamilyMismatchException extends BackupException {
private final List<TableName> mismatchedTables;

private ColumnFamilyMismatchException(String msg, List<TableName> mismatchedTables) {
super(msg);
this.mismatchedTables = mismatchedTables;
}

public static final class ColumnFamilyMismatchExceptionBuilder {
private final List<TableName> mismatchedTables = new ArrayList<>();
private final StringBuilder msg = new StringBuilder();

public ColumnFamilyMismatchExceptionBuilder addMismatchedTable(TableName tableName,
ColumnFamilyDescriptor[] currentCfs, ColumnFamilyDescriptor[] backupCfs) {
this.mismatchedTables.add(tableName);

String currentCfsParsed = StringUtils.join(currentCfs, ',');
String backupCfsParsed = StringUtils.join(backupCfs, ',');
msg.append("\nMismatch in column family descriptor for table: ").append(tableName)
.append("\n");
msg.append("Current families: ").append(currentCfsParsed).append("\n");
msg.append("Backup families: ").append(backupCfsParsed);

return this;
}

public ColumnFamilyMismatchException build() {
return new ColumnFamilyMismatchException(msg.toString(), mismatchedTables);
}
}

public List<TableName> getMismatchedTables() {
return mismatchedTables;
}

public static ColumnFamilyMismatchExceptionBuilder newBuilder() {
return new ColumnFamilyMismatchExceptionBuilder();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -32,16 +33,21 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupCopyJob;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
import org.apache.hadoop.hbase.backup.BackupRequest;
import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
import org.apache.hadoop.hbase.backup.mapreduce.MapReduceBackupCopyJob;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
Expand All @@ -52,6 +58,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;

/**
* Incremental backup implementation. See the {@link #execute() execute} method.
*/
Expand Down Expand Up @@ -261,8 +269,11 @@ private void updateFileLists(List<String> activeFiles, List<String> archiveFiles
}

@Override
public void execute() throws IOException {
public void execute() throws IOException, ColumnFamilyMismatchException {
try {
Map<TableName, String> tablesToFullBackupIds = getFullBackupIds();
verifyCfCompatibility(backupInfo.getTables(), tablesToFullBackupIds);

// case PREPARE_INCREMENTAL:
beginBackup(backupManager, backupInfo);
backupInfo.setPhase(BackupPhase.PREPARE_INCREMENTAL);
Expand Down Expand Up @@ -436,4 +447,86 @@ protected Path getBulkOutputDir() {
path = new Path(path, backupId);
return path;
}

private Map<TableName, String> getFullBackupIds() throws IOException {
// Ancestors are stored from newest to oldest, so we can iterate backwards
// in order to populate our backupId map with the most recent full backup
// for a given table
List<BackupManifest.BackupImage> images = getAncestors(backupInfo);
Map<TableName, String> results = new HashMap<>();
for (int i = images.size() - 1; i >= 0; i--) {
BackupManifest.BackupImage image = images.get(i);
if (image.getType() != BackupType.FULL) {
continue;
}

for (TableName tn : image.getTableNames()) {
results.put(tn, image.getBackupId());
}
}
return results;
}

/**
* Verifies that the current table descriptor CFs matches the descriptor CFs of the last full
* backup for the tables. This ensures CF compatibility across incremental backups. If a mismatch
* is detected, a full table backup should be taken, rather than an incremental one
*/
private void verifyCfCompatibility(Set<TableName> tables,
Map<TableName, String> tablesToFullBackupId) throws IOException, ColumnFamilyMismatchException {
ColumnFamilyMismatchException.ColumnFamilyMismatchExceptionBuilder exBuilder =
ColumnFamilyMismatchException.newBuilder();
try (Admin admin = conn.getAdmin(); BackupAdminImpl backupAdmin = new BackupAdminImpl(conn)) {
for (TableName tn : tables) {
String backupId = tablesToFullBackupId.get(tn);
BackupInfo fullBackupInfo = backupAdmin.getBackupInfo(backupId);

ColumnFamilyDescriptor[] currentCfs = admin.getDescriptor(tn).getColumnFamilies();
String snapshotName = fullBackupInfo.getSnapshotName(tn);
Path root = HBackupFileSystem.getTableBackupPath(tn,
new Path(fullBackupInfo.getBackupRootDir()), fullBackupInfo.getBackupId());
Path manifestDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, root);

FileSystem fs;
try {
fs = FileSystem.get(new URI(fullBackupInfo.getBackupRootDir()), conf);
} catch (URISyntaxException e) {
throw new IOException("Unable to get fs", e);
}

SnapshotProtos.SnapshotDescription snapshotDescription =
SnapshotDescriptionUtils.readSnapshotInfo(fs, manifestDir);
SnapshotManifest manifest =
SnapshotManifest.open(conf, fs, manifestDir, snapshotDescription);

ColumnFamilyDescriptor[] backupCfs = manifest.getTableDescriptor().getColumnFamilies();
if (!areCfsCompatible(currentCfs, backupCfs)) {
exBuilder.addMismatchedTable(tn, currentCfs, backupCfs);
}
}
}

ColumnFamilyMismatchException ex = exBuilder.build();
if (!ex.getMismatchedTables().isEmpty()) {
throw ex;
}
}

private static boolean areCfsCompatible(ColumnFamilyDescriptor[] currentCfs,
ColumnFamilyDescriptor[] backupCfs) {
if (currentCfs.length != backupCfs.length) {
return false;
}

for (int i = 0; i < backupCfs.length; i++) {
String currentCf = currentCfs[i].getNameAsString();
String backupCf = backupCfs[i].getNameAsString();

if (!currentCf.equals(backupCf)) {
return false;
}
}

return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
package org.apache.hadoop.hbase.backup;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
Expand All @@ -30,6 +32,7 @@
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.impl.BackupAdminImpl;
import org.apache.hadoop.hbase.backup.impl.BackupManifest;
import org.apache.hadoop.hbase.backup.impl.ColumnFamilyMismatchException;
import org.apache.hadoop.hbase.backup.util.BackupUtils;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
Expand All @@ -52,6 +55,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.Throwables;
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.com.google.common.collect.Sets;

Expand Down Expand Up @@ -101,9 +106,7 @@ public void TestIncBackupRestore() throws Exception {
insertIntoTable(conn, table1, mobName, 3, NB_ROWS_FAM3).close();
Admin admin = conn.getAdmin();
BackupAdminImpl client = new BackupAdminImpl(conn);
BackupRequest request = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR);
String backupIdFull = client.backupTables(request);
assertTrue(checkSucceeded(backupIdFull));
String backupIdFull = takeFullBackup(tables, client);

// #2 - insert some data to table
Table t1 = insertIntoTable(conn, table1, famName, 1, ADD_ROWS);
Expand Down Expand Up @@ -138,16 +141,14 @@ public void TestIncBackupRestore() throws Exception {
// exception will be thrown.
LOG.debug("region is not splittable, because " + e);
}
while (!admin.isTableAvailable(table1)) {
Thread.sleep(100);
}
TEST_UTIL.waitTableAvailable(table1);
long endSplitTime = EnvironmentEdgeManager.currentTime();
// split finished
LOG.debug("split finished in =" + (endSplitTime - startSplitTime));

// #3 - incremental backup for multiple tables
tables = Lists.newArrayList(table1, table2);
request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
BackupRequest request = createBackupRequest(BackupType.INCREMENTAL, tables, BACKUP_ROOT_DIR);
String backupIdIncMultiple = client.backupTables(request);
assertTrue(checkSucceeded(backupIdIncMultiple));
BackupManifest manifest =
Expand All @@ -162,6 +163,13 @@ public void TestIncBackupRestore() throws Exception {
.build();
TEST_UTIL.getAdmin().modifyTable(newTable1Desc);

// check that an incremental backup fails because the CFs don't match
final List<TableName> tablesCopy = tables;
IOException ex = assertThrows(IOException.class, () -> client
.backupTables(createBackupRequest(BackupType.INCREMENTAL, tablesCopy, BACKUP_ROOT_DIR)));
checkThrowsCFMismatch(ex, ImmutableList.of(table1));
takeFullBackup(tables, client);

int NB_ROWS_FAM2 = 7;
Table t3 = insertIntoTable(conn, table1, fam2Name, 2, NB_ROWS_FAM2);
t3.close();
Expand Down Expand Up @@ -224,4 +232,19 @@ public void TestIncBackupRestore() throws Exception {
admin.close();
}
}

private void checkThrowsCFMismatch(IOException ex, List<TableName> tables) {
Throwable cause = Throwables.getRootCause(ex);
assertEquals(cause.getClass(), ColumnFamilyMismatchException.class);
ColumnFamilyMismatchException e = (ColumnFamilyMismatchException) cause;
assertEquals(tables, e.getMismatchedTables());
}

private String takeFullBackup(List<TableName> tables, BackupAdminImpl backupAdmin)
throws IOException {
BackupRequest req = createBackupRequest(BackupType.FULL, tables, BACKUP_ROOT_DIR);
String backupId = backupAdmin.backupTables(req);
checkSucceeded(backupId);
return backupId;
}
}

0 comments on commit e2bece6

Please sign in to comment.