Skip to content

Commit

Permalink
CDPD-68778: HBASE-28637 asyncwal should attempt to recover lease if c…
Browse files Browse the repository at this point in the history
…lose fails (apache#5962) (apache#6033)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
(cherry picked from commit 0e8cfdb)
(cherry picked from commit b881296)

Change-Id: I6ad1d3122a0452d583a7269be9dbc3851f4f3a37
  • Loading branch information
jojochuang authored and sdevineni committed Aug 29, 2024
1 parent 7b424b2 commit b68fecf
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.io.asyncfs.AsyncFSOutput;
import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
import org.apache.hadoop.hbase.wal.AsyncFSWALProvider;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.hadoop.hbase.wal.WALKeyImpl;
Expand Down Expand Up @@ -746,13 +747,22 @@ private void waitForSafePoint() {
}
}

private void recoverLease(FileSystem fs, Path p, Configuration conf) {
try {
RecoverLeaseFSUtils.recoverFileLease(fs, p, conf, null);
} catch (IOException ex) {
LOG.error("Unable to recover lease after several attempts. Give up.", ex);
}
}

private void closeWriter(AsyncWriter writer, Path path) {
inflightWALClosures.put(path.getName(), writer);
closeExecutor.execute(() -> {
try {
writer.close();
} catch (IOException e) {
LOG.warn("close old writer failed", e);
LOG.warn("close old writer failed.", e);
recoverLease(this.fs, path, conf);
} finally {
// call this even if the above close fails, as there is no other chance we can set closed to
// true, it will not cause big problems.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.FSHLogProvider;
import org.apache.hadoop.hbase.wal.WALEdit;
Expand Down Expand Up @@ -458,15 +459,22 @@ private void closeWriter(Writer writer, Path path, boolean syncCloseCall) throws
writer.close();
span.addEvent("writer closed");
} catch (IOException ioe) {
int errors = closeErrorCount.incrementAndGet();
boolean hasUnflushedEntries = isUnflushedEntries();
if (syncCloseCall && (hasUnflushedEntries || (errors > this.closeErrorsTolerated))) {
LOG.error("Close of WAL " + path + " failed. Cause=\"" + ioe.getMessage() + "\", errors="
+ errors + ", hasUnflushedEntries=" + hasUnflushedEntries);
throw ioe;
LOG.warn("close old writer failed.", ioe);
try {
RecoverLeaseFSUtils.recoverFileLease(fs, path, conf, null);
} catch (IOException ex) {
LOG.error("Unable to recover lease after several attempts. Give up.", ex);

int errors = closeErrorCount.incrementAndGet();
boolean hasUnflushedEntries = isUnflushedEntries();
if (syncCloseCall && (hasUnflushedEntries || (errors > this.closeErrorsTolerated))) {
LOG.error("Close of WAL " + path + " failed. Cause=\"" + ioe.getMessage() + "\", errors="
+ errors + ", hasUnflushedEntries=" + hasUnflushedEntries);
throw ioe;
}
LOG.warn("Riding over failed WAL close of " + path
+ "; THIS FILE WAS NOT CLOSED BUT ALL EDITS SYNCED SO SHOULD BE OK", ioe);
}
LOG.warn("Riding over failed WAL close of " + path
+ "; THIS FILE WAS NOT CLOSED BUT ALL EDITS SYNCED SO SHOULD BE OK", ioe);
}
}

Expand Down

0 comments on commit b68fecf

Please sign in to comment.