-
Notifications
You must be signed in to change notification settings - Fork 15k
KAFKA-16637: AsyncKafkaConsumer removes offset fetch responses from cache too aggressively #16241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b030479
3a4ad22
5b1473b
3741802
5aac85e
21c0572
7ee84f3
83dff8e
74ef706
7427b97
9c5b98d
a468c1c
e248ff0
04a8e69
9c537d2
7680893
b52f21a
18d75ba
1e3ef7d
fe2a126
f3be9e2
2a2a7ee
a9b110e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
|
|
||
| import org.apache.kafka.clients.ClientResponse; | ||
| import org.apache.kafka.clients.consumer.CommitFailedException; | ||
| import org.apache.kafka.clients.consumer.Consumer; | ||
| import org.apache.kafka.clients.consumer.ConsumerConfig; | ||
| import org.apache.kafka.clients.consumer.OffsetAndMetadata; | ||
| import org.apache.kafka.clients.consumer.RetriableCommitFailedException; | ||
|
|
@@ -47,6 +48,7 @@ | |
| import org.apache.kafka.common.utils.Timer; | ||
| import org.slf4j.Logger; | ||
|
|
||
| import java.time.Duration; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
|
|
@@ -61,6 +63,7 @@ | |
| import java.util.Queue; | ||
| import java.util.Set; | ||
| import java.util.concurrent.CompletableFuture; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
| import java.util.function.BiConsumer; | ||
| import java.util.stream.Collectors; | ||
|
|
||
|
|
@@ -94,6 +97,7 @@ public class CommitRequestManager implements RequestManager, MemberStateListener | |
| * group anymore. | ||
| */ | ||
| private final MemberInfo memberInfo; | ||
| final OffsetFetchResultCache offsetFetchResultCache; | ||
|
|
||
| public CommitRequestManager( | ||
| final Time time, | ||
|
|
@@ -156,6 +160,7 @@ public CommitRequestManager( | |
| this.memberInfo = new MemberInfo(); | ||
| this.metricsManager = new OffsetCommitMetricsManager(metrics); | ||
| this.offsetCommitCallbackInvoker = offsetCommitCallbackInvoker; | ||
| this.offsetFetchResultCache = new OffsetFetchResultCache(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -519,6 +524,7 @@ private void fetchOffsetsWithRetries(final OffsetFetchRequestState fetchRequest, | |
| log.warn("A duplicated, inflight, request was identified, but unable to find it in the " + | ||
| "outbound buffer:" + fetchRequest); | ||
| } | ||
| offsetFetchResultCache.maybeCache(fetchRequest, res); | ||
| if (error == null) { | ||
| result.complete(res); | ||
| } else { | ||
|
|
@@ -1116,6 +1122,7 @@ boolean hasUnsentRequests() { | |
| OffsetCommitRequestState addOffsetCommitRequest(OffsetCommitRequestState request) { | ||
| log.debug("Enqueuing OffsetCommit request for offsets: {}", request.offsets); | ||
| unsentOffsetCommits.add(request); | ||
| offsetFetchResultCache.clear(); | ||
| return request; | ||
| } | ||
|
|
||
|
|
@@ -1127,6 +1134,9 @@ OffsetCommitRequestState addOffsetCommitRequest(OffsetCommitRequestState request | |
| * upon completion. | ||
| */ | ||
| private CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> addOffsetFetchRequest(final OffsetFetchRequestState request) { | ||
| if (offsetFetchResultCache.maybeCompleteRequest(request)) | ||
| return request.future; | ||
|
|
||
| Optional<OffsetFetchRequestState> dupe = | ||
| unsentOffsetFetches.stream().filter(r -> r.sameRequest(request)).findAny(); | ||
| Optional<OffsetFetchRequestState> inflight = | ||
|
|
@@ -1279,4 +1289,123 @@ static class MemberInfo { | |
| this.memberEpoch = Optional.empty(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now crystal clear. Thanks for putting in the extra work. |
||
| * As the name suggests, this caches the result of an {@link OffsetFetchRequestState offset fetch request} | ||
| * <em>in specific cases</em>. The logic resembles that of {@link ConsumerCoordinator}'s | ||
| * <code>PendingCommittedOffsetRequest</code> mechanism. | ||
| * | ||
| * <p/> | ||
| * | ||
| * This handles the case where a user calls {@link Consumer#poll(Duration)} with a low timeout value, such as 0. | ||
| * As the timeout value approaches zero, the likelihood that the client will be able to fetch the offsets from the | ||
| * broker within the user's timeout also decreases. But we can take advantage of the fact that {@code poll()} | ||
| * is typically invoked in a tight loop, so the user's application will likely call {@code poll()} again to make | ||
| * a second attempt with the same set of {@link TopicPartition}s as the first attempt. | ||
| * | ||
| * <p/> | ||
| * | ||
| * The idea here is to cache the results of the last successful--but expired--response. An operation may exceed | ||
| * the time the user allotted, but that doesn't mean that the network request is aborted. In this scenario, it is | ||
| * often the case that the client receives a successful response, though it has technically exceeded the amount | ||
| * of time the user specified. However, as mentioned above, {@code poll()} is likely to be invoked again with the | ||
| * same set of partitions. By caching the successful response from attempt #1--though it timed out from the user's | ||
| * perspective--the client is able to satisfy attempt #2 from the cache. | ||
|
Comment on lines
+1308
to
+1313
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the expected behavior if there is a commit between the two calls to poll()?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a very good point. If I get it right, the legacy logic does not cache the fetch responses. It caches only the fetch request for which it hasn't received a response yet, with the future response (that can be safely done if the set of partitions requested are the same). As soon as it gets a response it clears the cached request (here). We should never cache the fetch responses because it could definitely lead to the gap that @cadonna is pointing out, if a commit happens in between.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lianetm in the legacy consumer, does the fact that the offset fetch request and the offset commit request use the same network connection ensure that the requests have an order and the commit request cannot overtake the fetch request if commit was sent after fetch? Sorry if this question seems dumb, but I am not familiar with the network part.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe so, but mainly because I expect that the broker will handle them one at a time, so it will receive a fetch first, a commit after, and it won't handle the commit until it's done with the previous fetch request. So legacy and new logic, it's all safe I would say until the moment we start caching on the client side. There we have to be very careful not to return cached fetch results (that we may have received before a commit).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a call to clear the cache whenever a commit (of any sort) is enqueued. |
||
| * | ||
| * <p/> | ||
| * | ||
| * The cached results are only used if the set of {@link TopicPartition}s and the client's member ID and epoch | ||
| * for attempt #2 matches that of attempt #1. | ||
| */ | ||
| class OffsetFetchResultCache { | ||
|
|
||
| private final AtomicReference<OffsetFetchResultDetail> cache = new AtomicReference<>(); | ||
|
|
||
| /** | ||
| * Iff the {@link OffsetFetchRequestState#isExpired() request timed out} <em>and</em> the request was | ||
| * successful, we write the result into the cache using the partition set from the request as the 'key.' | ||
| * Otherwise, we clear the existing cached value because the current result does not need to be cached, and | ||
| * thus there's no need for any previous cached value, either. | ||
| */ | ||
| void maybeCache(final OffsetFetchRequestState request, final Map<TopicPartition, OffsetAndMetadata> result) { | ||
| if (request.isExpired() && result != null) { | ||
| OffsetFetchResultDetail newValue = new OffsetFetchResultDetail(request.requestedPartitions, result); | ||
| log.debug("A new offset fetch result was cached: {}", newValue); | ||
| cache.set(newValue); | ||
| } else { | ||
| clear(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * If there's a cached value present that matches the same partitions of the previous request, | ||
| * {@link CompletableFuture#complete(Object) complete the Future}. In either case, we clear out the | ||
| * cache reference as it's either been used, or is now irrelevant. | ||
| */ | ||
| boolean maybeCompleteRequest(final OffsetFetchRequestState request) { | ||
| OffsetFetchResultDetail oldValue = clear(); | ||
|
|
||
| if (oldValue != null) { | ||
| if (oldValue.sameRequest(request.requestedPartitions)) { | ||
| log.debug("The cached offset fetch response matches the requested partitions ({})", request.requestedPartitions); | ||
| request.future.complete(oldValue.result); | ||
| return true; | ||
| } else { | ||
| log.debug( | ||
| "The requested partitions ({}) do not match the cached offset fetch response's partitions ({}); ignoring cached results", | ||
| request.requestedPartitions, | ||
| oldValue.requestedPartitions | ||
| ); | ||
| return false; | ||
| } | ||
| } else { | ||
| log.debug("There are no cached offset fetch results to attempt to use"); | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Retrieves the internal value of the cache. This is intended for testing, but given that the | ||
| * {@link OffsetFetchResultDetail} is immutable, it shouldn't hurt to allow access to this. | ||
| */ | ||
| OffsetFetchResultDetail get() { | ||
| return cache.get(); | ||
| } | ||
|
|
||
| OffsetFetchResultDetail clear() { | ||
| return cache.getAndSet(null); | ||
| } | ||
| } | ||
|
|
||
| class OffsetFetchResultDetail { | ||
|
|
||
| final Set<TopicPartition> requestedPartitions; | ||
| final Optional<String> requestedMemberId; | ||
| final Optional<Integer> requestedMemberEpoch; | ||
| final Map<TopicPartition, OffsetAndMetadata> result; | ||
|
|
||
| OffsetFetchResultDetail(final Set<TopicPartition> partitions, | ||
| final Map<TopicPartition, OffsetAndMetadata> result) { | ||
| this.requestedPartitions = Collections.unmodifiableSet(partitions); | ||
| this.requestedMemberId = memberInfo.memberId; | ||
| this.requestedMemberEpoch = memberInfo.memberEpoch; | ||
| this.result = Collections.unmodifiableMap(result); | ||
| } | ||
|
|
||
| boolean sameRequest(final Set<TopicPartition> currentPartitions) { | ||
| return Objects.equals(requestedPartitions, currentPartitions) && | ||
| Objects.equals(requestedMemberId, memberInfo.memberId) && | ||
| Objects.equals(requestedMemberEpoch, memberInfo.memberEpoch); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "OffsetFetchResultDetail{" + | ||
| "requestedPartitions=" + requestedPartitions + | ||
| ", requestedMemberId=" + requestedMemberId + | ||
| ", requestedMemberEpoch=" + requestedMemberEpoch + | ||
| ", result=" + result + | ||
| '}'; | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this here means that we're caching results instead of inflight request, which I'm worried could be risky. Let's review this sequence, I may be missing something:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a unit test for this case. Without the commit cache clearing it fails, but with it, it passes.