@@ -80,7 +80,7 @@ public class MultiClusterPooledConnectionProvider implements ConnectionProvider
8080 */
8181 private volatile Cluster activeCluster ;
8282
83- private final Lock activeClusterIndexLock = new ReentrantLock (true );
83+ private final Lock activeClusterChangeLock = new ReentrantLock (true );
8484
8585 /**
8686 * Functional interface for listening to cluster switch events. The event args contain the reason
@@ -183,7 +183,13 @@ public MultiClusterPooledConnectionProvider(MultiClusterClientConfig multiCluste
183183 // Mark initialization as complete - handleHealthStatusChange can now process events
184184 initializationComplete = true ;
185185 if (!activeCluster .isHealthy ()) {
186- activeCluster = waitForInitialHealthyCluster (statusTracker );
186+ // Race condition: Direct assignment to 'activeCluster' is not thread safe because
187+ // 'onHealthStatusChange' may execute concurrently once 'initializationComplete'
188+ // is set to true.
189+ // Simple rule is to never assign value of 'activeCluster' outside of
190+ // 'activeClusterChangeLock' once the 'initializationComplete' is done.
191+ waitForInitialHealthyCluster (statusTracker );
192+ iterateActiveCluster (SwitchReason .HEALTH_CHECK );
187193 }
188194 this .fallbackExceptionList = multiClusterClientConfig .getFallbackExceptionList ();
189195
@@ -211,11 +217,11 @@ public void add(ClusterConfig clusterConfig) {
211217 "Endpoint " + endpoint + " already exists in the provider" );
212218 }
213219
214- activeClusterIndexLock .lock ();
220+ activeClusterChangeLock .lock ();
215221 try {
216222 addClusterInternal (multiClusterClientConfig , clusterConfig );
217223 } finally {
218- activeClusterIndexLock .unlock ();
224+ activeClusterChangeLock .unlock ();
219225 }
220226 }
221227
@@ -240,7 +246,7 @@ public void remove(Endpoint endpoint) {
240246 }
241247 log .debug ("Removing endpoint {}" , endpoint );
242248
243- activeClusterIndexLock .lock ();
249+ activeClusterChangeLock .lock ();
244250 try {
245251 Cluster clusterToRemove = multiClusterMap .get (endpoint );
246252 boolean isActiveCluster = (activeCluster == clusterToRemove );
@@ -273,7 +279,7 @@ public void remove(Endpoint endpoint) {
273279 clusterToRemove .close ();
274280 }
275281 } finally {
276- activeClusterIndexLock .unlock ();
282+ activeClusterChangeLock .unlock ();
277283 }
278284 }
279285
@@ -542,7 +548,7 @@ private boolean setActiveCluster(Cluster cluster, boolean validateConnection) {
542548 // Cluster cluster = clusterEntry.getValue();
543549 // Field-level synchronization is used to avoid the edge case in which
544550 // incrementActiveMultiClusterIndex() is called at the same time
545- activeClusterIndexLock .lock ();
551+ activeClusterChangeLock .lock ();
546552 Cluster oldCluster ;
547553 try {
548554
@@ -563,7 +569,7 @@ private boolean setActiveCluster(Cluster cluster, boolean validateConnection) {
563569 oldCluster = activeCluster ;
564570 activeCluster = cluster ;
565571 } finally {
566- activeClusterIndexLock .unlock ();
572+ activeClusterChangeLock .unlock ();
567573 }
568574 boolean switched = oldCluster != cluster ;
569575 if (switched && this .multiClusterClientConfig .isFastFailover ()) {
0 commit comments