-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-29219 Ignore Empty WAL Files While Consuming Backed-Up WAL Files #7106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ddba129
7b16f17
c17942a
261d066
d8f931a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -328,14 +328,21 @@ List<InputSplit> getSplits(final JobContext context, final String startKey, fina | |
| throw e; | ||
| } | ||
| } | ||
|
|
||
| boolean ignoreEmptyFiles = | ||
| conf.getBoolean(WALPlayer.IGNORE_EMPTY_FILES, WALPlayer.DEFAULT_IGNORE_EMPTY_FILES); | ||
| List<InputSplit> splits = new ArrayList<InputSplit>(allFiles.size()); | ||
| for (FileStatus file : allFiles) { | ||
| if (ignoreEmptyFiles && file.getLen() == 0) { | ||
| LOG.warn("Ignoring empty file: " + file.getPath()); | ||
| continue; | ||
| } | ||
| splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime)); | ||
| } | ||
| return splits; | ||
| } | ||
|
|
||
| private Path[] getInputPaths(Configuration conf) { | ||
| Path[] getInputPaths(Configuration conf) { | ||
| String inpDirs = conf.get(FileInputFormat.INPUT_DIR); | ||
| return StringUtils | ||
| .stringToPath(inpDirs.split(conf.get(WALPlayer.INPUT_FILES_SEPARATOR_KEY, ","))); | ||
|
|
@@ -349,7 +356,7 @@ private Path[] getInputPaths(Configuration conf) { | |
| * equal to this value else we will filter out the file. If name does not seem to | ||
| * have a timestamp, we will just return it w/o filtering. | ||
| */ | ||
| private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime) | ||
| List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: If this is only for testing, can we add @VisibleForTesting here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, this is used in |
||
| throws IOException { | ||
| List<FileStatus> result = new ArrayList<>(); | ||
| LOG.debug("Scanning " + dir.toString() + " for WAL files"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -22,6 +22,7 @@ | |||||||||
| import static org.hamcrest.CoreMatchers.nullValue; | ||||||||||
| import static org.hamcrest.MatcherAssert.assertThat; | ||||||||||
| import static org.junit.Assert.assertEquals; | ||||||||||
| import static org.junit.Assert.assertNotEquals; | ||||||||||
| import static org.junit.Assert.assertTrue; | ||||||||||
| import static org.junit.Assert.fail; | ||||||||||
| import static org.mockito.ArgumentMatchers.any; | ||||||||||
|
|
@@ -31,10 +32,12 @@ | |||||||||
|
|
||||||||||
| import java.io.ByteArrayOutputStream; | ||||||||||
| import java.io.File; | ||||||||||
| import java.io.IOException; | ||||||||||
| import java.io.PrintStream; | ||||||||||
| import java.util.ArrayList; | ||||||||||
| import java.util.concurrent.ThreadLocalRandom; | ||||||||||
| import org.apache.hadoop.conf.Configuration; | ||||||||||
| import org.apache.hadoop.fs.FSDataOutputStream; | ||||||||||
| import org.apache.hadoop.fs.FileSystem; | ||||||||||
| import org.apache.hadoop.fs.Path; | ||||||||||
| import org.apache.hadoop.hbase.Cell; | ||||||||||
|
|
@@ -338,4 +341,47 @@ public void testMainMethod() throws Exception { | |||||||||
|
|
||||||||||
| } | ||||||||||
|
|
||||||||||
| @Test | ||||||||||
| public void testIgnoreEmptyWALFiles() throws Exception { | ||||||||||
| Path inputDir = createEmptyWALFile("empty-wal-dir"); | ||||||||||
| FileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem(); | ||||||||||
| Path emptyWAL = new Path(inputDir, "empty.wal"); | ||||||||||
|
|
||||||||||
| assertTrue("Empty WAL file should exist", dfs.exists(emptyWAL)); | ||||||||||
| assertEquals("WAL file should be 0 bytes", 0, dfs.getFileStatus(emptyWAL).getLen()); | ||||||||||
|
|
||||||||||
| Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); | ||||||||||
| conf.setBoolean(WALPlayer.IGNORE_EMPTY_FILES, true); | ||||||||||
|
|
||||||||||
| int exitCode = ToolRunner.run(conf, new WALPlayer(conf), new String[] { inputDir.toString() }); | ||||||||||
| assertEquals("WALPlayer should exit cleanly even with empty files", 0, exitCode); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| @Test | ||||||||||
| public void testFailOnEmptyWALFilesWhenNotIgnored() throws Exception { | ||||||||||
| Path inputDir = createEmptyWALFile("fail-empty-wal-dir"); | ||||||||||
| FileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem(); | ||||||||||
| Path emptyWAL = new Path(inputDir, "empty.wal"); | ||||||||||
|
|
||||||||||
| assertTrue("Empty WAL file should exist", dfs.exists(emptyWAL)); | ||||||||||
| assertEquals("WAL file should be 0 bytes", 0, dfs.getFileStatus(emptyWAL).getLen()); | ||||||||||
|
|
||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: For consistency, maybe add the same assertions you have in the other test method regarding the newly created empty WAL file:
Suggested change
|
||||||||||
| Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); | ||||||||||
| conf.setBoolean(WALPlayer.IGNORE_EMPTY_FILES, false); | ||||||||||
|
|
||||||||||
| int exitCode = ToolRunner.run(conf, new WALPlayer(conf), new String[] { inputDir.toString() }); | ||||||||||
| assertNotEquals("WALPlayer should fail on empty files when not ignored", 0, exitCode); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| private Path createEmptyWALFile(String walDir) throws IOException { | ||||||||||
| FileSystem dfs = TEST_UTIL.getDFSCluster().getFileSystem(); | ||||||||||
| Path inputDir = new Path("/" + walDir); | ||||||||||
| dfs.mkdirs(inputDir); | ||||||||||
|
|
||||||||||
| Path emptyWAL = new Path(inputDir, "empty.wal"); | ||||||||||
| FSDataOutputStream out = dfs.create(emptyWAL); | ||||||||||
| out.close(); // Explicitly closing the stream | ||||||||||
|
|
||||||||||
| return inputDir; | ||||||||||
| } | ||||||||||
| } | ||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so, this flag basically is only used for Continuous Backup? and should it be false other than the use case of Continuous Backup ?
nit: maybe add a javadoc comment in the code that at what situation we should and we should not use this flag
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, the default behavior is false, so I didn’t want to change it as it might be assumed in other parts of the code.
Sure — this flag controls whether the WALPlayer job should throw an exception when it encounters an empty file that it can't parse as a valid WAL file, or whether it should skip it silently.