Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replacing Coordinator Queue With Deque & Fixing Usage Of toMap Util #950

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD 2-Clause License. See the LICENSE file in the project root for license information.
* See the NOTICE file in the project root for additional information regarding copyright ownership.
*/
package com.linkedin.datastream.server;

import java.util.Properties;

/**
* Callable Coordinator is used for overriding coordinator behaviors for tests
*/
public interface CallableCoordinatorForTest {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this interface? Seems like TestCoordinator.java has all you need.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didnt wanted to add another constructor in the TestCoordinator.java to override another method of coordinator. With this interface, we could minimize code duplication and pass the overrides of coordinator as an argument.

For the test "testLeaderDoAssignmentForNewlyElectedLeaderFailurePath", I overrode performPreAssignmentCleanup method to test a failure path, where I am using this.

/**
* invoking constructor of coordinator with params,
* - datastreamCache to maintain all the datastreams in the cluster.
* - properties to use while creating coordinator.
* */
Coordinator invoke(CachedDatastreamReader cachedDatastreamReader, Properties properties);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.lang.reflect.Method;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand All @@ -21,6 +22,7 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Queue;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CountDownLatch;
Expand Down Expand Up @@ -136,7 +138,12 @@ private Coordinator createCoordinator(String zkAddr, String cluster) throws Exce
}

private Coordinator createCoordinator(String zkAddr, String cluster, Properties override) throws Exception {
return createCoordinator(zkAddr, cluster, override, new DummyTransportProviderAdminFactory());
return createCoordinator(zkAddr, cluster, override, new DummyTransportProviderAdminFactory(), Coordinator::new);
}

private Coordinator createCoordinator(String zkAddr, String cluster, Properties override,
TransportProviderAdminFactory transportProviderAdminFactory) throws Exception {
return createCoordinator(zkAddr, cluster, override, transportProviderAdminFactory, Coordinator::new);
}

private Coordinator createCoordinator(String zkAddr, String cluster, Properties override,
Expand All @@ -163,7 +170,7 @@ protected synchronized void handleEvent(CoordinatorEvent event) {
}

private Coordinator createCoordinator(String zkAddr, String cluster, Properties override,
TransportProviderAdminFactory transportProviderAdminFactory) throws Exception {
TransportProviderAdminFactory transportProviderAdminFactory, CallableCoordinatorForTest callableCoordinatorForTest) throws Exception {
Properties props = new Properties();
props.put(CoordinatorConfig.CONFIG_CLUSTER, cluster);
props.put(CoordinatorConfig.CONFIG_ZK_ADDRESS, zkAddr);
Expand All @@ -172,7 +179,7 @@ private Coordinator createCoordinator(String zkAddr, String cluster, Properties
props.putAll(override);
ZkClient client = new ZkClient(zkAddr);
_cachedDatastreamReader = new CachedDatastreamReader(client, cluster);
Coordinator coordinator = new Coordinator(_cachedDatastreamReader, props);
Coordinator coordinator = callableCoordinatorForTest.invoke(_cachedDatastreamReader, props);
coordinator.addTransportProvider(DummyTransportProviderAdminFactory.PROVIDER_NAME,
transportProviderAdminFactory.createTransportProviderAdmin(DummyTransportProviderAdminFactory.PROVIDER_NAME,
new Properties()));
Expand Down Expand Up @@ -3945,6 +3952,60 @@ public void testThroughputViolatingTopicsHandlingForSingleDatastreamOnCreateWith
coordinator.getDatastreamCache().getZkclient().close();
}

@Test
public void testLeaderDoAssignmentForNewlyElectedLeaderFailurePath() throws Exception {
String testCluster = "testLeaderDoAssignmentForNewlyElectedLeaderFailurePath";
String connectorType = "connectorType";
String streamName = "testLeaderDoAssignmentForNewlyElectedLeaderFailurePath";

Queue<CoordinatorEvent> shadowCoordinatorQueue = new ArrayDeque<>();
Properties properties = new Properties();
Coordinator coordinator =
createCoordinator(_zkConnectionString, testCluster, properties, new DummyTransportProviderAdminFactory(),
(cachedDatastreamReader, props) -> new Coordinator(cachedDatastreamReader, props) {

// This override generates an exception while the newly elected leader performs pre assignment cleanup.
// The exception causes the handleLeaderDoAssignment handler to exit, along with inserting the same event
// in the queue for a reattempt.
@Override
protected void performPreAssignmentCleanup(List<DatastreamGroup> datastreamGroups) {
throw new RuntimeException("testing exception path in assignment cleanup routine");
}

// This override collects the coordinator queue events in a shadow queue for test purposes.
@Override
protected synchronized void handleEvent(CoordinatorEvent event) {
shadowCoordinatorQueue.add(event);
super.handleEvent(event);
}
});
TestHookConnector dummyConnector = new TestHookConnector("dummyConnector", connectorType);
coordinator.addConnector(connectorType, dummyConnector, new BroadcastStrategy(Optional.empty()), false,
new SourceBasedDeduper(), null);
coordinator.start();

ZkClient zkClient = new ZkClient(_zkConnectionString);

Datastream testDatastream =
DatastreamTestUtils.createAndStoreDatastreams(zkClient, testCluster, connectorType, streamName)[0];

coordinator.stop();
zkClient.close();
coordinator.getDatastreamCache().getZkclient().close();

// This is the event which should be added to the front of the queue once the handler exits on an exception.
CoordinatorEvent leaderDoAssignmentForNewlyElectedLeader =
new CoordinatorEvent(CoordinatorEvent.EventType.LEADER_DO_ASSIGNMENT, true);

// while-ing until the newly elected leader performs the handlerLeaderDoAssignment request for the first time.
while (!Objects.equals(shadowCoordinatorQueue.peek(), leaderDoAssignmentForNewlyElectedLeader)) {
shadowCoordinatorQueue.poll();
}

// As we expect the reattempt event to added to the front of the queue, the front of the queue should be the same.
Assert.assertEquals(shadowCoordinatorQueue.poll(), leaderDoAssignmentForNewlyElectedLeader);
}
ehoner marked this conversation as resolved.
Show resolved Hide resolved

// This helper function helps compare the requesting topics with the topics reflected in the server.
private BooleanSupplier validateIfViolatingTopicsAreReflectedInServer(Datastream testStream, Coordinator coordinator,
Set<String> requestedThroughputViolatingTopics) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,8 @@ private void handleAssignmentChange(boolean isDatastreamUpdate) throws TimeoutEx
_assignedDatastreamTasks.putAll(currentAssignment.values()
.stream()
.flatMap(Collection::stream)
.collect(Collectors.toMap(DatastreamTask::getDatastreamTaskName, Function.identity())));
.collect(Collectors.toMap(DatastreamTask::getDatastreamTaskName, Function.identity(),
(existingTask, duplicateTask) -> existingTask)));
List<DatastreamTask> newAssignment = new ArrayList<>(_assignedDatastreamTasks.values());

if ((totalTasks - submittedTasks) > 0) {
Expand Down Expand Up @@ -1524,10 +1525,11 @@ private void scheduleLeaderDoAssignmentRetry(boolean isNewlyElectedLeader) {
_log.info("Schedule retry for leader assigning tasks");
_metrics.updateKeyedMeter(CoordinatorMetrics.KeyedMeter.HANDLE_LEADER_DO_ASSIGNMENT_NUM_RETRIES, 1);
_leaderDoAssignmentScheduled.set(true);
// scheduling LEADER_DO_ASSIGNMENT event instantly to prevent any other event being handled before the reattempt.
_leaderDoAssignmentScheduledFuture = _scheduledExecutor.schedule(() -> {
_eventQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(isNewlyElectedLeader));
_eventQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(isNewlyElectedLeader), false);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the main change in this PR. When a newly elected leader does assignment, it schedules the assignment event with task cleanup in front of the queue, to make sure it gets executed before anything else. Just to confirm my understanding.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct! We just need to make sure that nothing else gets handled apart from a successful handling of "LEADER_DO_ASSIGNMENT" (with newly elected leader flag enabled) for a newly elected leader.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isNewlyElectedLeader this will always be true, right? or when do we have that false?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isNewlyElectedLeader is only true when a new leader is elected. In many other cases we have to ask the leader to do assignments, and at those calls, isNewlyElectedLeader will be false.

_leaderDoAssignmentScheduled.set(false);
}, _config.getRetryIntervalMs(), TimeUnit.MILLISECONDS);
}, 0, TimeUnit.MILLISECONDS);
}

@VisibleForTesting
Expand Down Expand Up @@ -1614,7 +1616,7 @@ private void revokeUnclaimedAssignmentTokens(Map<String, List<AssignmentToken>>
}
}

private void performPreAssignmentCleanup(List<DatastreamGroup> datastreamGroups) {
protected void performPreAssignmentCleanup(List<DatastreamGroup> datastreamGroups) {

// Map between instance to tasks assigned to the instance.
Map<String, Set<DatastreamTask>> previousAssignmentByInstance = _adapter.getAllAssignedDatastreamTasks();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ private CoordinatorEvent(EventType eventType) {
_eventMetadata = null;
}

private CoordinatorEvent(EventType eventType, Object eventMetadata) {
protected CoordinatorEvent(EventType eventType, Object eventMetadata) {
_eventType = eventType;
_eventMetadata = eventMetadata;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.LinkedBlockingDeque;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -44,7 +44,7 @@ class CoordinatorEventBlockingQueue implements MetricsAware {
static final String GAUGE_KEY = "queuedEvents";

private final Set<CoordinatorEvent> _eventSet;
private final Queue<CoordinatorEvent> _eventQueue;
private final Deque<CoordinatorEvent> _eventQueue;
private final DynamicMetricsManager _dynamicMetricsManager;
private final Gauge<Integer> _gauge;
private final Counter _counter;
Expand All @@ -59,7 +59,7 @@ class CoordinatorEventBlockingQueue implements MetricsAware {
*/
CoordinatorEventBlockingQueue(String key) {
_eventSet = new HashSet<>();
_eventQueue = new LinkedBlockingQueue<>();
_eventQueue = new LinkedBlockingDeque<>();
_dynamicMetricsManager = DynamicMetricsManager.getInstance();

String prefix = buildMetricName(key);
Expand All @@ -73,16 +73,30 @@ class CoordinatorEventBlockingQueue implements MetricsAware {


/**
* Add a single event to the queue, overwriting events with the same name and same metadata.
* Add a single event to the queue. Defaults to adding the event at the end of the queue.
* @param event CoordinatorEvent event to add to the queue
*/
public synchronized void put(CoordinatorEvent event) {
LOG.info("Queuing event {} to event queue", event.getType());
put(event, true);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When do we need to support for inserting at rear?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the events to this coordinator queue are inserted in the rear. The only case in which we have to insert in the front is what the PR proposes.

}

/**
* Add a single event to the queue, de-duping events with the same name and same metadata.
* @param event CoordinatorEvent event to add to the queue
* @param insertInTheEnd if true, indicates to add the event to the end of the queue and front, otherwise.
*/
public synchronized void put(CoordinatorEvent event, boolean insertInTheEnd) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the public APIs be similar to a Deque? I think that put(<event>) and putFirst(<event>) are clearer than using a boolean flag. The boolean option is fine for the private internal implementation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for understanding: LinkedBlockingDeque is already a thread safe and we are using only offer or offerFirst, what is the rational on having this method synchronized?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Minor]: another thing is, we want to have add from both ends but remove should be only from front. However, by using deque we will allow add/remove from both ends, Is there a way we can restrict remove from rear?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The synchronized methods were added long before but I am guessing that the shared set variable might create race and would let in duplicate events in some cases if those methods are not synchronized.

That is why the wrapper on top of the LinkedBlockingDeque is implemented which only supports taking out events from the front.

LOG.info("Queuing event {} at the " + (insertInTheEnd ? "end" : "front") + " of the event queue", event.getType());
if (_eventSet.contains(event)) {
_counter.inc(); // count duplicate event
} else {
// only insert if there isn't an event present in the queue with the same name and same metadata.
boolean result = _eventQueue.offer(event);
boolean result;
if (insertInTheEnd) {
result = _eventQueue.offer(event);
} else {
result = _eventQueue.offerFirst(event);
}
if (!result) {
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,8 @@ public Map<String, List<DatastreamTask>> getTasksToCleanUp(List<DatastreamGroup>
Map<String, DatastreamTask> assignmentsMap = currentAssignment.values()
.stream()
.flatMap(Collection::stream)
.collect(Collectors.toMap(DatastreamTask::getDatastreamTaskName, Function.identity()));
.collect(Collectors.toMap(DatastreamTask::getDatastreamTaskName, Function.identity(),
(existingTask, duplicateTask) -> existingTask));
ehoner marked this conversation as resolved.
Show resolved Hide resolved

for (String instance : currentAssignment.keySet()) {
// find the dependency tasks which also exist in the assignmentsMap.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -759,10 +759,11 @@ public void updateAllAssignmentsAndIssueTokens(Map<String, List<DatastreamTask>>
private Map<DatastreamGroup, Set<String>> getStoppingDatastreamGroupInstances(
List<DatastreamGroup> stoppingDatastreamGroups) {
Map<String, Set<DatastreamTask>> currentAssignment = getAllAssignedDatastreamTasks();
Set<String> stoppingDatastreamTaskPrefixes = stoppingDatastreamGroups.stream().
map(DatastreamGroup::getTaskPrefix).collect(toSet());
Map<String, DatastreamGroup> taskPrefixDatastreamGroups = stoppingDatastreamGroups.stream().
collect(Collectors.toMap(DatastreamGroup::getTaskPrefix, Function.identity()));
Set<String> stoppingDatastreamTaskPrefixes =
stoppingDatastreamGroups.stream().map(DatastreamGroup::getTaskPrefix).collect(toSet());
Map<String, DatastreamGroup> taskPrefixDatastreamGroups = stoppingDatastreamGroups.stream()
.collect(Collectors.toMap(DatastreamGroup::getTaskPrefix, Function.identity(),
(existingDatastreamGroup, duplicateDatastreamGroup) -> existingDatastreamGroup));

Map<DatastreamGroup, Set<String>> stoppingDgInstances = new HashMap<>();
currentAssignment.keySet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ public void resetMetrics() {
public void testHappyPath() throws Exception {
CoordinatorEventBlockingQueue eventBlockingQueue = new CoordinatorEventBlockingQueue(SIMPLE_NAME);
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(false));
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(true));
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(false));
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(true));
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(true), false);
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(false), true);
eventBlockingQueue.put(CoordinatorEvent.createLeaderDoAssignmentEvent(true), false);
eventBlockingQueue.put(CoordinatorEvent.createLeaderPartitionAssignmentEvent("test1"));
eventBlockingQueue.put(CoordinatorEvent.createLeaderPartitionAssignmentEvent("test1"));
eventBlockingQueue.put(CoordinatorEvent.createLeaderPartitionAssignmentEvent("test2"));
eventBlockingQueue.put(CoordinatorEvent.createLeaderPartitionAssignmentEvent("test2"), false);
eventBlockingQueue.put(CoordinatorEvent.HANDLE_ASSIGNMENT_CHANGE_EVENT);
eventBlockingQueue.put(CoordinatorEvent.HANDLE_ASSIGNMENT_CHANGE_EVENT);
eventBlockingQueue.put(CoordinatorEvent.HANDLE_ASSIGNMENT_CHANGE_EVENT);
Assert.assertEquals(eventBlockingQueue.size(), 5);
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderDoAssignmentEvent(false));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderPartitionAssignmentEvent("test2"));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderDoAssignmentEvent(true));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderDoAssignmentEvent(false));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderPartitionAssignmentEvent("test1"));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.createLeaderPartitionAssignmentEvent("test2"));
Assert.assertEquals(eventBlockingQueue.take(), CoordinatorEvent.HANDLE_ASSIGNMENT_CHANGE_EVENT);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,39 @@ public void testExpectedNumberOfTasks() {
Assert.assertEquals(-1, getNumTasksForDatastreamFromZK(ds.get(0).getName()));
}

@Test
public void testTasksCleanUpWithDuplicatesAcrossInstances() {
StickyPartitionAssignmentStrategy strategy =
createStickyPartitionAssignmentStrategy(3, 90, true, getZkClient(true), _clusterName);
List<DatastreamGroup> datastreams = generateDatastreams("testTasksCleanUpWithDuplicatesAcrossInstances", 1, 3);

Map<String, Set<DatastreamTask>> assignment = generateEmptyAssignment(datastreams, 2, 3, true);

List<String> newPartitions = ImmutableList.of("t-0", "t-1", "tt-0", "tt-1", "ttt-0", "ttt-1", "ttt-2");
assignment =
strategy.assignPartitions(assignment, new DatastreamGroupPartitionsMetadata(datastreams.get(0), newPartitions));

// This following snippet demonstrates a previous leader performing some task movements but got interrupted, OOMed
// or hit session expiry.
// The previous leader was able to add some tasks to the newer instance's assignment, but couldn't remove them from
// previous instance's assignment.
// The next leader should be able to identify and cleanup, even though there'll be duplicate tasks across instances.
DatastreamTask previousTask = null;
for (String instance : assignment.keySet()) {
if (previousTask != null) {
assignment.get(instance).add(previousTask);
}
previousTask = assignment.get(instance).iterator().next();
}

try {
Map<String, List<DatastreamTask>> taskToCleanup = strategy.getTasksToCleanUp(datastreams, assignment);
Assert.assertEquals(taskToCleanup.size(), 0);
} catch (Exception exception) {
Assert.fail("Received exception while finding tasks to be cleaned up", exception.getCause());
}
}

private int getNumTasksForDatastreamFromZK(String taskPrefix) {
String numTasksPath = KeyBuilder.datastreamNumTasks(_clusterName, taskPrefix);
if (!_zkClient.exists(numTasksPath)) {
Expand Down