Skip to content

Commit

Permalink
HBASE-28656 Optimize the verifyCopyResult logic in ExportSnapshot (#5996
Browse files Browse the repository at this point in the history
)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Wei-Chiu Chuang <weichiu@apache.org>
  • Loading branch information
2005hithlj committed Jun 24, 2024
1 parent 44009a8 commit d066045
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@ public enum Counter {
BYTES_COPIED
}

/**
* Indicates the checksum comparison result.
*/
public enum ChecksumComparison {
TRUE, // checksum comparison is compatible and true.
FALSE, // checksum comparison is compatible and false.
INCOMPATIBLE, // checksum comparison is not compatible.
}

private static class ExportMapper
extends Mapper<BytesWritable, NullWritable, NullWritable, NullWritable> {
private static final Logger LOG = LoggerFactory.getLogger(ExportMapper.class);
Expand Down Expand Up @@ -530,6 +539,9 @@ private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
}
}

/**
* Utility to compare the file length and checksums for the paths specified.
*/
private void verifyCopyResult(final FileStatus inputStat, final FileStatus outputStat)
throws IOException {
long inputLen = inputStat.getLen();
Expand All @@ -544,20 +556,64 @@ private void verifyCopyResult(final FileStatus inputStat, final FileStatus outpu

// If length==0, we will skip checksum
if (inputLen != 0 && verifyChecksum) {
FileChecksum inChecksum = getFileChecksum(inputFs, inputPath);
if (inChecksum == null) {
LOG.warn("Input file " + inputPath + " checksums are not available");
}
FileChecksum outChecksum = getFileChecksum(outputFs, outputPath);
if (outChecksum == null) {
LOG.warn("Output file " + outputPath + " checksums are not available");
}
if (inChecksum != null && outChecksum != null && !inChecksum.equals(outChecksum)) {
throw new IOException("Checksum mismatch between " + inputPath + " and " + outputPath);
FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());

ChecksumComparison checksumComparison = verifyChecksum(inChecksum, outChecksum);
if (!checksumComparison.equals(ChecksumComparison.TRUE)) {
StringBuilder errMessage = new StringBuilder("Checksum mismatch between ")
.append(inputPath).append(" and ").append(outputPath).append(".");

boolean addSkipHint = false;
String inputScheme = inputFs.getScheme();
String outputScheme = outputFs.getScheme();
if (!inputScheme.equals(outputScheme)) {
errMessage.append(" Input and output filesystems are of different types.\n")
.append("Their checksum algorithms may be incompatible.");
addSkipHint = true;
} else if (inputStat.getBlockSize() != outputStat.getBlockSize()) {
errMessage.append(" Input and output differ in block-size.");
addSkipHint = true;
} else if (
inChecksum != null && outChecksum != null
&& !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
) {
errMessage.append(" Input and output checksum algorithms are of different types.");
addSkipHint = true;
}
if (addSkipHint) {
errMessage
.append(" You can choose file-level checksum validation via "
+ "-Ddfs.checksum.combine.mode=COMPOSITE_CRC when block-sizes"
+ " or filesystems are different.")
.append(" Or you can skip checksum-checks altogether with --no-checksum-verify.\n")
.append(" (NOTE: By skipping checksums, one runs the risk of "
+ "masking data-corruption during file-transfer.)\n");
}
throw new IOException(errMessage.toString());
}
}
}

/**
* Utility to compare checksums
*/
private ChecksumComparison verifyChecksum(final FileChecksum inChecksum,
final FileChecksum outChecksum) {
// If the input or output checksum is null, or the algorithms of input and output are not
// equal, that means there is no comparison
// and return not compatible. else if matched, return compatible with the matched result.
if (
inChecksum == null || outChecksum == null
|| !inChecksum.getAlgorithmName().equals(outChecksum.getAlgorithmName())
) {
return ChecksumComparison.INCOMPATIBLE;
} else if (inChecksum.equals(outChecksum)) {
return ChecksumComparison.TRUE;
}
return ChecksumComparison.FALSE;
}

/**
* Check if the two files are equal by looking at the file length, and at the checksum (if user
* has specified the verifyChecksum flag).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,23 @@ public void testConsecutiveExports() throws Exception {
removeExportDir(copyDir);
}

@Test
public void testExportWithChecksum() throws Exception {
// Test different schemes: input scheme is hdfs:// and output scheme is file://
// The checksum verification will fail
Path copyLocalDir = getLocalDestinationDir(TEST_UTIL);
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyLocalDir, false, false,
getBypassRegionPredicate(), false, true);

// Test same schemes: input scheme is hdfs:// and output scheme is hdfs://
// The checksum verification will success
Path copyHdfsDir = getHdfsDestinationDir();
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, snapshotName,
tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyHdfsDir, false, false,
getBypassRegionPredicate(), true, true);
}

@Test
public void testExportWithTargetName() throws Exception {
final byte[] targetName = Bytes.toBytes("testExportWithTargetName");
Expand Down Expand Up @@ -282,7 +299,7 @@ protected void testExportFileSystemState(final TableName tableName, final byte[]
throws Exception {
testExportFileSystemState(TEST_UTIL.getConfiguration(), tableName, snapshotName, targetName,
filesExpected, TEST_UTIL.getDefaultRootDirPath(), copyDir, overwrite, resetTtl,
getBypassRegionPredicate(), true);
getBypassRegionPredicate(), true, false);
}

/**
Expand All @@ -291,8 +308,8 @@ protected void testExportFileSystemState(final TableName tableName, final byte[]
protected static void testExportFileSystemState(final Configuration conf,
final TableName tableName, final byte[] snapshotName, final byte[] targetName,
final int filesExpected, final Path srcDir, Path rawTgtDir, final boolean overwrite,
final boolean resetTtl, final RegionPredicate bypassregionPredicate, boolean success)
throws Exception {
final boolean resetTtl, final RegionPredicate bypassregionPredicate, final boolean success,
final boolean checksumVerify) throws Exception {
FileSystem tgtFs = rawTgtDir.getFileSystem(conf);
FileSystem srcFs = srcDir.getFileSystem(conf);
Path tgtDir = rawTgtDir.makeQualified(tgtFs.getUri(), tgtFs.getWorkingDirectory());
Expand All @@ -313,6 +330,9 @@ protected static void testExportFileSystemState(final Configuration conf,
if (resetTtl) {
opts.add("--reset-ttl");
}
if (!checksumVerify) {
opts.add("--no-checksum-verify");
}

// Export Snapshot
int res = run(conf, new ExportSnapshot(), opts.toArray(new String[opts.size()]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public void testExportRetry() throws Exception {
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName, Bytes.toBytes(snapshotName),
Bytes.toBytes(snapshotName), tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true,
false, null, true);
false, null, true, false);
}

/**
Expand All @@ -170,6 +170,6 @@ public void testExportFailure() throws Exception {
conf.setInt("mapreduce.map.maxattempts", 3);
TestExportSnapshot.testExportFileSystemState(conf, tableName, Bytes.toBytes(snapshotName),
Bytes.toBytes(snapshotName), tableNumFiles, TEST_UTIL.getDefaultRootDirPath(), copyDir, true,
false, null, false);
false, null, false, false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static void testSnapshotWithRefsExportFileSystemState(FileSystem fs,
TableName tableName = builder.getTableDescriptor().getTableName();
TestExportSnapshot.testExportFileSystemState(testUtil.getConfiguration(), tableName,
snapshotName, snapshotName, snapshotFilesCount, testDir,
getDestinationDir(fs, testUtil, testDir), false, false, null, true);
getDestinationDir(fs, testUtil, testDir), false, false, null, true, false);
}

static Path getDestinationDir(FileSystem fs, HBaseCommonTestingUtility hctu, Path testDir)
Expand Down

0 comments on commit d066045

Please sign in to comment.