Skip to content

Commit 1baa094

Browse files
poorbarcodesrinath-ctds
authored andcommitted
[fix] [ml] Add entry fail due to race condition about add entry failed/timeout and switch ledger (apache#22221)
(cherry picked from commit b798e7f)
1 parent 8027d5c commit 1baa094

File tree

6 files changed

+113
-26
lines changed

6 files changed

+113
-26
lines changed

managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerImpl.java

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import java.util.concurrent.ScheduledFuture;
6060
import java.util.concurrent.TimeUnit;
6161
import java.util.concurrent.TimeoutException;
62+
import java.util.concurrent.atomic.AtomicBoolean;
6263
import java.util.concurrent.atomic.AtomicInteger;
6364
import java.util.concurrent.atomic.AtomicLongFieldUpdater;
6465
import java.util.concurrent.atomic.AtomicReference;
@@ -242,6 +243,9 @@ public class ManagedLedgerImpl implements ManagedLedger, CreateCallback {
242243
protected volatile long lastAddEntryTimeMs = 0;
243244
private long inactiveLedgerRollOverTimeMs = 0;
244245

246+
/** A signal that may trigger all the subsequent OpAddEntry of current ledger to be failed due to timeout. **/
247+
protected volatile AtomicBoolean currentLedgerTimeoutTriggered;
248+
245249
protected static final int DEFAULT_LEDGER_DELETE_RETRIES = 3;
246250
protected static final int DEFAULT_LEDGER_DELETE_BACKOFF_TIME_SEC = 60;
247251
private static final String MIGRATION_STATE_PROPERTY = "migrated";
@@ -534,6 +538,7 @@ public void operationFailed(MetaStoreException e) {
534538
STATE_UPDATER.set(this, State.LedgerOpened);
535539
updateLastLedgerCreatedTimeAndScheduleRolloverTask();
536540
currentLedger = lh;
541+
currentLedgerTimeoutTriggered = new AtomicBoolean();
537542

538543
lastConfirmedEntry = new PositionImpl(lh.getId(), -1);
539544
// bypass empty ledgers, find last ledger with Message if possible.
@@ -776,7 +781,8 @@ public void asyncAddEntry(ByteBuf buffer, AddEntryCallback callback, Object ctx)
776781

777782
// Jump to specific thread to avoid contention from writers writing from different threads
778783
executor.execute(() -> {
779-
OpAddEntry addOperation = OpAddEntry.createNoRetainBuffer(this, buffer, callback, ctx);
784+
OpAddEntry addOperation = OpAddEntry.createNoRetainBuffer(this, buffer, callback, ctx,
785+
currentLedgerTimeoutTriggered);
780786
internalAsyncAddEntry(addOperation);
781787
});
782788
}
@@ -792,7 +798,8 @@ public void asyncAddEntry(ByteBuf buffer, int numberOfMessages, AddEntryCallback
792798

793799
// Jump to specific thread to avoid contention from writers writing from different threads
794800
executor.execute(() -> {
795-
OpAddEntry addOperation = OpAddEntry.createNoRetainBuffer(this, buffer, numberOfMessages, callback, ctx);
801+
OpAddEntry addOperation = OpAddEntry.createNoRetainBuffer(this, buffer, numberOfMessages, callback, ctx,
802+
currentLedgerTimeoutTriggered);
796803
internalAsyncAddEntry(addOperation);
797804
});
798805
}
@@ -844,6 +851,7 @@ protected synchronized void internalAsyncAddEntry(OpAddEntry addOperation) {
844851

845852
// Write into lastLedger
846853
addOperation.setLedger(currentLedger);
854+
addOperation.setTimeoutTriggered(currentLedgerTimeoutTriggered);
847855

848856
++currentLedgerEntries;
849857
currentLedgerSize += addOperation.data.readableBytes();
@@ -1587,6 +1595,7 @@ public void operationComplete(Void v, Stat stat) {
15871595
LedgerHandle originalCurrentLedger = currentLedger;
15881596
ledgers.put(lh.getId(), newLedger);
15891597
currentLedger = lh;
1598+
currentLedgerTimeoutTriggered = new AtomicBoolean();
15901599
currentLedgerEntries = 0;
15911600
currentLedgerSize = 0;
15921601
updateLedgersIdsComplete(originalCurrentLedger);
@@ -1670,9 +1679,11 @@ void createNewOpAddEntryForNewLedger() {
16701679
if (existsOp != null) {
16711680
// If op is used by another ledger handle, we need to close it and create a new one
16721681
if (existsOp.ledger != null) {
1673-
existsOp.close();
1674-
existsOp = OpAddEntry.createNoRetainBuffer(existsOp.ml, existsOp.data,
1675-
existsOp.getNumberOfMessages(), existsOp.callback, existsOp.ctx);
1682+
existsOp = existsOp.duplicateAndClose(currentLedgerTimeoutTriggered);
1683+
} else {
1684+
// This scenario should not happen.
1685+
log.warn("[{}] An OpAddEntry's ledger is empty.", name);
1686+
existsOp.setTimeoutTriggered(currentLedgerTimeoutTriggered);
16761687
}
16771688
existsOp.setLedger(currentLedger);
16781689
pendingAddEntries.add(existsOp);
@@ -4211,13 +4222,14 @@ private void checkAddTimeout() {
42114222
}
42124223
OpAddEntry opAddEntry = pendingAddEntries.peek();
42134224
if (opAddEntry != null) {
4214-
final long finalAddOpCount = opAddEntry.addOpCount;
42154225
boolean isTimedOut = opAddEntry.lastInitTime != -1
42164226
&& TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - opAddEntry.lastInitTime) >= timeoutSec;
42174227
if (isTimedOut) {
4218-
log.error("Failed to add entry for ledger {} in time-out {} sec",
4219-
(opAddEntry.ledger != null ? opAddEntry.ledger.getId() : -1), timeoutSec);
4220-
opAddEntry.handleAddTimeoutFailure(opAddEntry.ledger, finalAddOpCount);
4228+
log.warn("[{}] Failed to add entry {}:{} in time-out {} sec", this.name,
4229+
opAddEntry.ledger != null ? opAddEntry.ledger.getId() : -1,
4230+
opAddEntry.entryId, timeoutSec);
4231+
currentLedgerTimeoutTriggered.set(true);
4232+
opAddEntry.handleAddFailure(opAddEntry.ledger);
42214233
}
42224234
}
42234235
}

managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/OpAddEntry.java

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import io.netty.util.Recycler.Handle;
2525
import io.netty.util.ReferenceCountUtil;
2626
import java.util.concurrent.TimeUnit;
27+
import java.util.concurrent.atomic.AtomicBoolean;
2728
import java.util.concurrent.atomic.AtomicLongFieldUpdater;
2829
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
30+
import lombok.Setter;
2931
import lombok.extern.slf4j.Slf4j;
3032
import org.apache.bookkeeper.client.AsyncCallback.AddCallback;
3133
import org.apache.bookkeeper.client.AsyncCallback.CloseCallback;
@@ -45,7 +47,7 @@
4547
public class OpAddEntry implements AddCallback, CloseCallback, Runnable {
4648
protected ManagedLedgerImpl ml;
4749
LedgerHandle ledger;
48-
private long entryId;
50+
long entryId;
4951
private int numberOfMessages;
5052

5153
@SuppressWarnings("unused")
@@ -68,6 +70,9 @@ public class OpAddEntry implements AddCallback, CloseCallback, Runnable {
6870
AtomicReferenceFieldUpdater.newUpdater(OpAddEntry.class, OpAddEntry.State.class, "state");
6971
volatile State state;
7072

73+
@Setter
74+
private AtomicBoolean timeoutTriggered;
75+
7176
enum State {
7277
OPEN,
7378
INITIATED,
@@ -76,17 +81,18 @@ enum State {
7681
}
7782

7883
public static OpAddEntry createNoRetainBuffer(ManagedLedgerImpl ml, ByteBuf data, AddEntryCallback callback,
79-
Object ctx) {
80-
OpAddEntry op = createOpAddEntryNoRetainBuffer(ml, data, callback, ctx);
84+
Object ctx, AtomicBoolean timeoutTriggered) {
85+
OpAddEntry op = createOpAddEntryNoRetainBuffer(ml, data, callback, ctx, timeoutTriggered);
8186
if (log.isDebugEnabled()) {
8287
log.debug("Created new OpAddEntry {}", op);
8388
}
8489
return op;
8590
}
8691

8792
public static OpAddEntry createNoRetainBuffer(ManagedLedgerImpl ml, ByteBuf data, int numberOfMessages,
88-
AddEntryCallback callback, Object ctx) {
89-
OpAddEntry op = createOpAddEntryNoRetainBuffer(ml, data, callback, ctx);
93+
AddEntryCallback callback, Object ctx,
94+
AtomicBoolean timeoutTriggered) {
95+
OpAddEntry op = createOpAddEntryNoRetainBuffer(ml, data, callback, ctx, timeoutTriggered);
9096
op.numberOfMessages = numberOfMessages;
9197
if (log.isDebugEnabled()) {
9298
log.debug("Created new OpAddEntry {}", op);
@@ -95,7 +101,8 @@ public static OpAddEntry createNoRetainBuffer(ManagedLedgerImpl ml, ByteBuf data
95101
}
96102

97103
private static OpAddEntry createOpAddEntryNoRetainBuffer(ManagedLedgerImpl ml, ByteBuf data,
98-
AddEntryCallback callback, Object ctx) {
104+
AddEntryCallback callback, Object ctx,
105+
AtomicBoolean timeoutTriggered) {
99106
OpAddEntry op = RECYCLER.get();
100107
op.ml = ml;
101108
op.ledger = null;
@@ -109,6 +116,7 @@ private static OpAddEntry createOpAddEntryNoRetainBuffer(ManagedLedgerImpl ml, B
109116
op.startTime = System.nanoTime();
110117
op.state = State.OPEN;
111118
op.payloadProcessorHandle = null;
119+
op.timeoutTriggered = timeoutTriggered;
112120
ml.mbean.addAddEntrySample(op.dataLength);
113121
return op;
114122
}
@@ -176,7 +184,9 @@ public void addComplete(int rc, final LedgerHandle lh, long entryId, Object ctx)
176184
if (!STATE_UPDATER.compareAndSet(OpAddEntry.this, State.INITIATED, State.COMPLETED)) {
177185
log.warn("[{}] The add op is terminal legacy callback for entry {}-{} adding.", ml.getName(), lh.getId(),
178186
entryId);
179-
OpAddEntry.this.recycle();
187+
// Since there is a thread is coping this object, do not recycle this object to avoid other problems.
188+
// For example: we recycled this object, other thread get a null "opAddEntry.{variable_name}".
189+
// Recycling is not mandatory, JVM GC will collect it.
180190
return;
181191
}
182192

@@ -200,7 +210,7 @@ public void addComplete(int rc, final LedgerHandle lh, long entryId, Object ctx)
200210
lh == null ? -1 : lh.getId(), entryId, dataLength, rc);
201211
}
202212

203-
if (rc != BKException.Code.OK) {
213+
if (rc != BKException.Code.OK || timeoutTriggered.get()) {
204214
handleAddFailure(lh);
205215
} else {
206216
// Trigger addComplete callback in a thread hashed on the managed ledger name
@@ -307,13 +317,6 @@ private boolean checkAndCompleteOp(Object ctx) {
307317
return false;
308318
}
309319

310-
void handleAddTimeoutFailure(final LedgerHandle ledger, Object ctx) {
311-
if (checkAndCompleteOp(ctx)) {
312-
this.close();
313-
this.handleAddFailure(ledger);
314-
}
315-
}
316-
317320
/**
318321
* It handles add failure on the given ledger. it can be triggered when add-entry fails or times out.
319322
*
@@ -333,8 +336,11 @@ void handleAddFailure(final LedgerHandle lh) {
333336
});
334337
}
335338

336-
void close() {
339+
OpAddEntry duplicateAndClose(AtomicBoolean timeoutTriggered) {
337340
STATE_UPDATER.set(OpAddEntry.this, State.CLOSED);
341+
OpAddEntry duplicate =
342+
OpAddEntry.createNoRetainBuffer(ml, data, getNumberOfMessages(), callback, ctx, timeoutTriggered);
343+
return duplicate;
338344
}
339345

340346
public State getState() {
@@ -389,6 +395,7 @@ public void recycle() {
389395
startTime = -1;
390396
lastInitTime = -1;
391397
payloadProcessorHandle = null;
398+
timeoutTriggered = null;
392399
recyclerHandle.recycle(this);
393400
}
394401

managed-ledger/src/main/java/org/apache/bookkeeper/mledger/impl/ShadowManagedLedgerImpl.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.Map;
2626
import java.util.TreeMap;
2727
import java.util.concurrent.CompletableFuture;
28+
import java.util.concurrent.atomic.AtomicBoolean;
2829
import java.util.function.Supplier;
2930
import lombok.extern.slf4j.Slf4j;
3031
import org.apache.bookkeeper.client.AsyncCallback;
@@ -54,6 +55,8 @@ public ShadowManagedLedgerImpl(ManagedLedgerFactoryImpl factory, BookKeeper book
5455
String name, final Supplier<CompletableFuture<Boolean>> mlOwnershipChecker) {
5556
super(factory, bookKeeper, store, config, scheduledExecutor, name, mlOwnershipChecker);
5657
this.sourceMLName = config.getShadowSourceName();
58+
// ShadowManagedLedgerImpl does not implement add entry timeout yet, so this variable will always be false.
59+
this.currentLedgerTimeoutTriggered = new AtomicBoolean(false);
5760
}
5861

5962
/**

managed-ledger/src/test/java/org/apache/bookkeeper/mledger/impl/ManagedLedgerTest.java

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@
146146
import org.apache.pulsar.metadata.impl.FaultInjectionMetadataStore;
147147
import org.awaitility.Awaitility;
148148
import org.awaitility.reflect.WhiteboxImpl;
149+
import org.eclipse.jetty.util.BlockingArrayQueue;
149150
import org.mockito.Mockito;
150151
import org.testng.Assert;
151152
import org.testng.annotations.DataProvider;
@@ -3184,6 +3185,55 @@ public void readEntriesFailed(ManagedLedgerException exception, Object ctx) {
31843185
ledger.close();
31853186
}
31863187

3188+
@Test
3189+
public void testAddEntryResponseTimeout() throws Exception {
3190+
// Create ML with feature Add Entry Timeout Check.
3191+
final ManagedLedgerConfig config = new ManagedLedgerConfig().setAddEntryTimeoutSeconds(2);
3192+
final ManagedLedgerImpl ledger = (ManagedLedgerImpl) factory.open("ml1", config);
3193+
final ManagedCursor cursor = ledger.openCursor("c1");
3194+
final CollectCtxAddEntryCallback collectCtxAddEntryCallback = new CollectCtxAddEntryCallback();
3195+
3196+
// Insert a response delay.
3197+
bkc.addEntryResponseDelay(8, TimeUnit.SECONDS);
3198+
3199+
// Add two entries.
3200+
final byte[] msg1 = new byte[]{1};
3201+
final byte[] msg2 = new byte[]{2};
3202+
int ctx1 = 1;
3203+
int ctx2 = 2;
3204+
ledger.asyncAddEntry(msg1, collectCtxAddEntryCallback, ctx1);
3205+
ledger.asyncAddEntry(msg2, collectCtxAddEntryCallback, ctx2);
3206+
// Verify all write requests are completed.
3207+
Awaitility.await().untilAsserted(() -> {
3208+
assertEquals(collectCtxAddEntryCallback.addCompleteCtxList, Arrays.asList(1, 2));
3209+
});
3210+
Entry entry1 = cursor.readEntries(1).get(0);
3211+
assertEquals(entry1.getData(), msg1);
3212+
entry1.release();
3213+
Entry entry2 = cursor.readEntries(1).get(0);
3214+
assertEquals(entry2.getData(), msg2);
3215+
entry2.release();
3216+
3217+
// cleanup.
3218+
factory.delete(ledger.name);
3219+
}
3220+
3221+
private static class CollectCtxAddEntryCallback implements AddEntryCallback {
3222+
3223+
public List<Object> addCompleteCtxList = new BlockingArrayQueue<>();
3224+
public List<Object> addFailedCtxList = new BlockingArrayQueue<>();
3225+
3226+
@Override
3227+
public void addComplete(Position position, ByteBuf entryData, Object ctx) {
3228+
addCompleteCtxList.add(ctx);
3229+
}
3230+
3231+
@Override
3232+
public void addFailed(ManagedLedgerException exception, Object ctx) {
3233+
addFailedCtxList.add(ctx);
3234+
}
3235+
}
3236+
31873237
/**
31883238
* It verifies that if bk-client doesn't complete the add-entry in given time out then broker is resilient enough
31893239
* to create new ledger and add entry successfully.
@@ -3259,7 +3309,8 @@ public void avoidUseSameOpAddEntryBetweenDifferentLedger() throws Exception {
32593309

32603310
List<OpAddEntry> oldOps = new ArrayList<>();
32613311
for (int i = 0; i < 10; i++) {
3262-
OpAddEntry op = OpAddEntry.createNoRetainBuffer(ledger, ByteBufAllocator.DEFAULT.buffer(128).retain(), null, null);
3312+
OpAddEntry op = OpAddEntry.createNoRetainBuffer(ledger,
3313+
ByteBufAllocator.DEFAULT.buffer(128).retain(), null, null, new AtomicBoolean());
32633314
if (i > 4) {
32643315
op.setLedger(mock(LedgerHandle.class));
32653316
}

testmocks/src/main/java/org/apache/bookkeeper/client/PulsarMockBookKeeper.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.bookkeeper.client;
2020

21+
import static com.google.common.base.Preconditions.checkArgument;
2122
import com.google.common.collect.Lists;
2223
import java.util.ArrayList;
2324
import java.util.Arrays;
@@ -89,6 +90,7 @@ public static Collection<BookieId> getMockEnsemble() {
8990
}
9091

9192
final Queue<Long> addEntryDelaysMillis = new ConcurrentLinkedQueue<>();
93+
final Queue<Long> addEntryResponseDelaysMillis = new ConcurrentLinkedQueue<>();
9294
final List<CompletableFuture<Void>> failures = new ArrayList<>();
9395
final List<CompletableFuture<Void>> addEntryFailures = new ArrayList<>();
9496

@@ -367,6 +369,11 @@ public synchronized void addEntryDelay(long delay, TimeUnit unit) {
367369
addEntryDelaysMillis.add(unit.toMillis(delay));
368370
}
369371

372+
public synchronized void addEntryResponseDelay(long delay, TimeUnit unit) {
373+
checkArgument(delay >= 0, "The delay time must not be negative.");
374+
addEntryResponseDelaysMillis.add(unit.toMillis(delay));
375+
}
376+
370377
static int getExceptionCode(Throwable t) {
371378
if (t instanceof BKException) {
372379
return ((BKException) t).getCode();

testmocks/src/main/java/org/apache/bookkeeper/client/PulsarMockLedgerHandle.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,13 @@ public void asyncAddEntry(final ByteBuf data, final AddCallback cb, final Object
197197
cb.addComplete(PulsarMockBookKeeper.getExceptionCode(exception),
198198
PulsarMockLedgerHandle.this, LedgerHandle.INVALID_ENTRY_ID, ctx);
199199
} else {
200+
Long responseDelayMillis = bk.addEntryResponseDelaysMillis.poll();
201+
if (responseDelayMillis != null) {
202+
try {
203+
Thread.sleep(responseDelayMillis);
204+
} catch (InterruptedException e) {
205+
}
206+
}
200207
cb.addComplete(BKException.Code.OK, PulsarMockLedgerHandle.this, entryId, ctx);
201208
}
202209
}, bk.executor);

0 commit comments

Comments
 (0)