Skip to content

Commit bce08f7

Browse files
authored
[automatic failover] Improve failover resilience by evaluating circuit breaker before retries (#4178)
* fix: Change evaluation order of circuit breaker and retry mechanisms This commit modifies the order in which circuit breaker and retry mechanisms are evaluated during failover scenarios. Previously, the circuit breaker was applied after all retries were exhausted, which could delay failover in unstable network conditions. Now, the circuit breaker counts each connection error separately, even during retry attempts. * format * format * use java-8 compatible formater version * fix AutomaticFailoverTest * formating
1 parent ee2f87f commit bce08f7

File tree

5 files changed

+79
-18
lines changed

5 files changed

+79
-18
lines changed

formatter-pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
<plugin>
2121
<groupId>net.revelc.code.formatter</groupId>
2222
<artifactId>formatter-maven-plugin</artifactId>
23-
<version>2.23.0</version>
23+
<version>2.16.0</version>
2424
<configuration>
2525
<configFile>${project.basedir}/hbase-formatter.xml</configFile>
2626
</configuration>

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@
335335
<directories>
336336
<directory>${project.basedir}/src/main/java/redis/clients/jedis/annots</directory>
337337
<directory>${project.basedir}/src/main/java/redis/clients/jedis/mcf</directory>
338+
<directory>${project.basedir}/src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java</directory>
338339
<directory>${project.basedir}/src/test/java/redis/clients/jedis/failover</directory>
339340
</directories>
340341
</configuration>

src/main/java/redis/clients/jedis/mcf/CircuitBreakerCommandExecutor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ public <T> T executeCommand(CommandObject<T> commandObject) {
3535
DecorateSupplier<T> supplier = Decorators
3636
.ofSupplier(() -> this.handleExecuteCommand(commandObject, cluster));
3737

38-
supplier.withRetry(cluster.getRetry());
3938
supplier.withCircuitBreaker(cluster.getCircuitBreaker());
39+
supplier.withRetry(cluster.getRetry());
4040
supplier.withFallback(provider.getFallbackExceptionList(),
4141
e -> this.handleClusterFailover(commandObject, cluster.getCircuitBreaker()));
4242

src/test/java/redis/clients/jedis/failover/FailoverIntegrationTest.java

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,4 +272,62 @@ public void testManualFailoverInflightCommandsWithErrorsPropagateError() throws
272272
assertThat(getNodeId(failoverClient.info("server")), equalTo(JEDIS2_ID));
273273
}
274274

275-
}
275+
/**
276+
* Tests that the CircuitBreaker counts each command error separately, and not just after all
277+
* retries are exhausted. This ensures that the circuit breaker opens based on the actual number
278+
* of send commands with failures, and not based on the number of logical operations.
279+
*/
280+
@Test
281+
public void testCircuitBreakerCountsEachConnectionErrorSeparately() throws IOException {
282+
MultiClusterClientConfig failoverConfig = new MultiClusterClientConfig.Builder(
283+
getClusterConfigs(
284+
DefaultJedisClientConfig.builder()
285+
.socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS)
286+
.connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build(),
287+
endpoint1, endpoint2)).retryMaxAttempts(2).retryWaitDuration(1)
288+
.circuitBreakerSlidingWindowType(COUNT_BASED).circuitBreakerSlidingWindowSize(3)
289+
.circuitBreakerFailureRateThreshold(50) // 50% failure
290+
// rate threshold
291+
.circuitBreakerSlidingWindowMinCalls(3).build();
292+
293+
MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider(
294+
failoverConfig);
295+
try (UnifiedJedis client = new UnifiedJedis(provider)) {
296+
// Verify initial connection to first endpoint
297+
assertThat(getNodeId(client.info("server")), equalTo(JEDIS1_ID));
298+
299+
// Disable first endpoint
300+
redisProxy1.disable();
301+
302+
// First command should fail and OPEN the circuit breaker immediately
303+
//
304+
// If CB is applied after retries:
305+
// - It would take 2 commands to OPEN CB (error is propagated for both commands)
306+
// - Failover to the next Endpoint happens on the 3rd command
307+
//
308+
// If CB is applied before retries:
309+
// - It should open after just 1 command with retries
310+
// - CB is OPEN after the 2nd retry of the first command
311+
// - Failover to the next Endpoint happens on the 2nd command
312+
//
313+
// This test verifies the second case by checking that:
314+
// 1. CB opens after the first command (with retries)
315+
// 2. The second command is routed to the second endpoint
316+
// Command 1
317+
assertThrows(JedisConnectionException.class, () -> client.info("server"));
318+
319+
// Circuit breaker should be open after just one command with retries
320+
assertThat(provider.getCluster(1).getCircuitBreaker().getState(),
321+
equalTo(CircuitBreaker.State.OPEN));
322+
323+
// Next command should be routed to the second endpoint
324+
// Command 2
325+
assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID));
326+
327+
// Command 3
328+
assertThat(getNodeId(client.info("server")), equalTo(JEDIS2_ID));
329+
330+
}
331+
}
332+
333+
}

src/test/java/redis/clients/jedis/misc/AutomaticFailoverTest.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import static org.junit.jupiter.api.Assertions.assertEquals;
2323
import static org.junit.jupiter.api.Assertions.assertFalse;
24+
import static org.junit.jupiter.api.Assertions.assertThrows;
2425
import static org.junit.jupiter.api.Assertions.assertTrue;
2526

2627
@Tag("failover")
@@ -91,37 +92,38 @@ public void transactionWithSwitch() {
9192

9293
@Test
9394
public void commandFailover() {
94-
int slidingWindowMinCalls = 10;
95-
int slidingWindowSize = 10;
95+
int slidingWindowMinCalls = 6;
96+
int slidingWindowSize = 6;
9697

9798
MultiClusterClientConfig.Builder builder = new MultiClusterClientConfig.Builder(
9899
getClusterConfigs(clientConfig, hostPortWithFailure, workingEndpoint.getHostAndPort()))
100+
.retryMaxAttempts(3) // Default is 3
99101
.circuitBreakerSlidingWindowMinCalls(slidingWindowMinCalls)
100102
.circuitBreakerSlidingWindowSize(slidingWindowSize);
101103

102104
RedisFailoverReporter failoverReporter = new RedisFailoverReporter();
103-
MultiClusterPooledConnectionProvider cacheProvider = new MultiClusterPooledConnectionProvider(builder.build());
104-
cacheProvider.setClusterFailoverPostProcessor(failoverReporter);
105+
MultiClusterPooledConnectionProvider connectionProvider = new MultiClusterPooledConnectionProvider(builder.build());
106+
connectionProvider.setClusterFailoverPostProcessor(failoverReporter);
105107

106-
UnifiedJedis jedis = new UnifiedJedis(cacheProvider);
108+
UnifiedJedis jedis = new UnifiedJedis(connectionProvider);
107109

108110
String key = "hash-" + System.nanoTime();
109111
log.info("Starting calls to Redis");
110112
assertFalse(failoverReporter.failedOver);
111-
for (int attempt = 0; attempt < 10; attempt++) {
112-
try {
113-
jedis.hset(key, "f1", "v1");
114-
} catch (JedisConnectionException jce) {
115-
//
116-
}
117-
assertFalse(failoverReporter.failedOver);
118-
}
113+
// First call fails - will be retried 3 times
114+
// this will increase the CircuitBreaker failure count to 3
115+
assertThrows(JedisConnectionException.class, () -> jedis.hset(key, "c1", "v1"));
119116

117+
// Second call fails - will be retried 3 times
118+
// this will increase the CircuitBreaker failure count to 6
120119
// should failover now
121-
jedis.hset(key, "f1", "v1");
120+
assertThrows(JedisConnectionException.class, () ->jedis.hset(key, "c2", "v1"));
121+
122+
// CB is in OPEN state now, next call should cause failover
123+
assertEquals(1L, jedis.hset(key, "c3", "v1"));
122124
assertTrue(failoverReporter.failedOver);
123125

124-
assertEquals(Collections.singletonMap("f1", "v1"), jedis.hgetAll(key));
126+
assertEquals(Collections.singletonMap("c3", "v1"), jedis.hgetAll(key));
125127
jedis.flushAll();
126128

127129
jedis.close();

0 commit comments

Comments
 (0)