-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the potential race condition in the BlobStore readhandler #12123
Changes from all commits
45716c7
ac62b65
1765c27
bd8a048
09c0598
f4b4b98
239413f
780215f
e9b1ce2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.ScheduledExecutorService; | ||
|
||
import org.apache.bookkeeper.client.BKException; | ||
import org.apache.bookkeeper.client.api.LastConfirmedAndEntry; | ||
import org.apache.bookkeeper.client.api.LedgerEntries; | ||
|
@@ -54,6 +55,13 @@ public class BlobStoreBackedReadHandleImpl implements ReadHandle { | |
private final DataInputStream dataStream; | ||
private final ExecutorService executor; | ||
|
||
enum State { | ||
Opened, | ||
Closed | ||
} | ||
|
||
private State state = null; | ||
|
||
private BlobStoreBackedReadHandleImpl(long ledgerId, OffloadIndexBlock index, | ||
BackedInputStream inputStream, | ||
ExecutorService executor) { | ||
|
@@ -62,6 +70,7 @@ private BlobStoreBackedReadHandleImpl(long ledgerId, OffloadIndexBlock index, | |
this.inputStream = inputStream; | ||
this.dataStream = new DataInputStream(inputStream); | ||
this.executor = executor; | ||
state = State.Opened; | ||
} | ||
|
||
@Override | ||
|
@@ -81,6 +90,7 @@ public CompletableFuture<Void> closeAsync() { | |
try { | ||
index.close(); | ||
inputStream.close(); | ||
state = State.Closed; | ||
promise.complete(null); | ||
} catch (IOException t) { | ||
promise.completeExceptionally(t); | ||
|
@@ -94,56 +104,73 @@ public CompletableFuture<LedgerEntries> readAsync(long firstEntry, long lastEntr | |
log.debug("Ledger {}: reading {} - {}", getId(), firstEntry, lastEntry); | ||
CompletableFuture<LedgerEntries> promise = new CompletableFuture<>(); | ||
executor.submit(() -> { | ||
List<LedgerEntry> entries = new ArrayList<LedgerEntry>(); | ||
try { | ||
if (firstEntry > lastEntry | ||
|| firstEntry < 0 | ||
|| lastEntry > getLastAddConfirmed()) { | ||
promise.completeExceptionally(new BKException.BKIncorrectParameterException()); | ||
return; | ||
} | ||
long entriesToRead = (lastEntry - firstEntry) + 1; | ||
List<LedgerEntry> entries = new ArrayList<LedgerEntry>(); | ||
long nextExpectedId = firstEntry; | ||
try { | ||
while (entriesToRead > 0) { | ||
int length = dataStream.readInt(); | ||
if (length < 0) { // hit padding or new block | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} | ||
long entryId = dataStream.readLong(); | ||
|
||
if (entryId == nextExpectedId) { | ||
ByteBuf buf = PulsarByteBufAllocator.DEFAULT.buffer(length, length); | ||
entries.add(LedgerEntryImpl.create(ledgerId, entryId, length, buf)); | ||
int toWrite = length; | ||
while (toWrite > 0) { | ||
toWrite -= buf.writeBytes(dataStream, toWrite); | ||
} | ||
entriesToRead--; | ||
nextExpectedId++; | ||
} else if (entryId > nextExpectedId) { | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} else if (entryId < nextExpectedId | ||
&& !index.getIndexEntryForEntry(nextExpectedId).equals( | ||
index.getIndexEntryForEntry(entryId))) { | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} else if (entryId > lastEntry) { | ||
log.info("Expected to read {}, but read {}, which is greater than last entry {}", | ||
nextExpectedId, entryId, lastEntry); | ||
throw new BKException.BKUnexpectedConditionException(); | ||
} else { | ||
long ignored = inputStream.skip(length); | ||
|
||
// seek the position to the first entry position, otherwise we will get the unexpected entry ID when doing | ||
// the first read, that would cause read an unexpected entry id which is out of range between firstEntry | ||
// and lastEntry | ||
// for example, when we get 1-10 entries at first, then the next request is get 2-9, the following code | ||
// will read the entry id from the stream and that is not the correct entry id, so it will seek to the | ||
// correct position then read the stream as normal. But the entry id may exceed the last entry id, that | ||
// will cause we are hardly to know the edge of the request range. | ||
inputStream.seek(index.getIndexEntryForEntry(firstEntry).getDataOffset()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we have seek to the firstEntry before reading, we can simplify the following check logic. If the entryId read from dataStream not equal to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seek operation is being used to seeking to the first entry id position, and then do the original logic. This operation is only to make sure the read position is as we wanted. The following check logic is still needed to help us to seek to the right position if we met some unexpected data. Maybe we can set a limitation to avoid it into an infinite loop. |
||
|
||
while (entriesToRead > 0) { | ||
if (state == State.Closed) { | ||
log.warn("Reading a closed read handler. Ledger ID: {}, Read range: {}-{}", ledgerId, firstEntry, lastEntry); | ||
throw new BKException.BKUnexpectedConditionException(); | ||
} | ||
int length = dataStream.readInt(); | ||
if (length < 0) { // hit padding or new block | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} | ||
long entryId = dataStream.readLong(); | ||
|
||
if (entryId == nextExpectedId) { | ||
ByteBuf buf = PulsarByteBufAllocator.DEFAULT.buffer(length, length); | ||
entries.add(LedgerEntryImpl.create(ledgerId, entryId, length, buf)); | ||
int toWrite = length; | ||
while (toWrite > 0) { | ||
toWrite -= buf.writeBytes(dataStream, toWrite); | ||
} | ||
entriesToRead--; | ||
nextExpectedId++; | ||
} else if (entryId > nextExpectedId && entryId < lastEntry) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The two if check are lead to inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset());
continue; can we merge them into one if check? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
log.warn("The read entry {} is not the expected entry {} but in the range of {} - {}," | ||
+ " seeking to the right position", entryId, nextExpectedId, nextExpectedId, lastEntry); | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} else if (entryId < nextExpectedId | ||
&& !index.getIndexEntryForEntry(nextExpectedId).equals(index.getIndexEntryForEntry(entryId))) { | ||
log.warn("Read an unexpected entry id {} which is smaller than the next expected entry id {}" | ||
+ ", seeking to the right position", entries, nextExpectedId); | ||
inputStream.seek(index.getIndexEntryForEntry(nextExpectedId).getDataOffset()); | ||
continue; | ||
} else if (entryId > lastEntry) { | ||
log.info("Expected to read {}, but read {}, which is greater than last entry {}", | ||
nextExpectedId, entryId, lastEntry); | ||
throw new BKException.BKUnexpectedConditionException(); | ||
} else { | ||
long ignore = inputStream.skip(length); | ||
} | ||
|
||
promise.complete(LedgerEntriesImpl.create(entries)); | ||
} catch (Throwable t) { | ||
promise.completeExceptionally(t); | ||
entries.forEach(LedgerEntry::close); | ||
} | ||
}); | ||
|
||
promise.complete(LedgerEntriesImpl.create(entries)); | ||
} catch (Throwable t) { | ||
promise.completeExceptionally(t); | ||
entries.forEach(LedgerEntry::close); | ||
} | ||
}); | ||
return promise; | ||
} | ||
|
||
|
@@ -203,6 +230,12 @@ public static ReadHandle open(ScheduledExecutorService executor, | |
versionCheck, | ||
index.getDataObjectLength(), | ||
readBufferSize); | ||
|
||
return new BlobStoreBackedReadHandleImpl(ledgerId, index, inputStream, executor); | ||
} | ||
|
||
// for testing | ||
State getState() { | ||
return this.state; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'd better put it behind
we can save the list object apply when the (firstEntry, lastEntry) check failed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The array list will not allocate space until there is an element added to it. And it is used in the
catch
, I think we can keep it.