Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duplicate message on validate offset #4261

Merged
merged 3 commits into from
Apr 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@

librdkafka v2.1.1 is a maintenance release:

* Avoid duplicate messages when a fetch response is received
in the middle of an offset validation request (#4261).
* Fix segmentation fault when subscribing to a non-existent topic and
calling `rd_kafka_message_leader_epoch()` on the polled `rkmessage` (#4245).
* Fix a segmentation fault when fetching from follower and the partition lease
expires while waiting for the result of a list offsets operation (#4254).


## Fixes

### Consumer fixes

* Duplicate messages can be emitted when a fetch response is received
in the middle of an offset validation request. Solved by discarding
the fetch if the state is not `ACTIVE`.
* When fetching from follower, if the partition lease expires after 5 minutes,
and a list offsets operation was requested to retrieve the earliest
or latest offset, it resulted in segmentation fault. This was fixed by
Expand Down Expand Up @@ -88,8 +92,11 @@ librdkafka v2.1.0 is a feature release:
interceptors might be called incorrectly (maybe multiple times) for not consumed messages.

### Consume API
* Segmentation fault when subscribing to a non-existent topic and
calling `rd_kafka_message_leader_epoch()` on the polled `rkmessage`.

* Duplicate messages can be emitted when a fetch response is received
in the middle of an offset validation request.
* Segmentation fault when subscribing to a non-existent topic and
calling `rd_kafka_message_leader_epoch()` on the polled `rkmessage`.



Expand Down
15 changes: 15 additions & 0 deletions src/rdkafka_fetcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,21 @@ rd_kafka_fetch_reply_handle_partition(rd_kafka_broker_t *rkb,
return RD_KAFKA_RESP_ERR_NO_ERROR;
}

/* Make sure toppar is in ACTIVE state. */
if (unlikely(rktp->rktp_fetch_state != RD_KAFKA_TOPPAR_FETCH_ACTIVE)) {
rd_kafka_toppar_unlock(rktp);
rd_rkb_dbg(rkb, MSG, "FETCH",
"%.*s [%" PRId32
"]: partition not in state ACTIVE: "
"discarding fetch response",
RD_KAFKAP_STR_PR(topic), hdr.Partition);
rd_kafka_toppar_destroy(rktp); /* from get */
rd_kafka_buf_skip(rkbuf, hdr.MessageSetSize);
if (aborted_txns)
rd_kafka_aborted_txns_destroy(aborted_txns);
return RD_KAFKA_RESP_ERR_NO_ERROR;
}

fetch_version = rktp->rktp_fetch_version;
rd_kafka_toppar_unlock(rktp);

Expand Down
146 changes: 146 additions & 0 deletions tests/0139-offset_validation_mock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* librdkafka - Apache Kafka C library
*
* Copyright (c) 2023, Confluent Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/

#include "test.h"

#include "../src/rdkafka_proto.h"


struct _produce_args {
const char *topic;
int sleep;
rd_kafka_conf_t *conf;
};

static int produce_concurrent_thread(void *args) {
rd_kafka_t *p1;
test_curr->exp_dr_err = RD_KAFKA_RESP_ERR_NO_ERROR;
test_curr->exp_dr_status = RD_KAFKA_MSG_STATUS_PERSISTED;

struct _produce_args *produce_args = args;
rd_sleep(produce_args->sleep);

p1 = test_create_handle(RD_KAFKA_PRODUCER, produce_args->conf);
TEST_CALL_ERR__(
rd_kafka_producev(p1, RD_KAFKA_V_TOPIC(produce_args->topic),
RD_KAFKA_V_VALUE("hi", 2), RD_KAFKA_V_END));
rd_kafka_flush(p1, -1);
rd_kafka_destroy(p1);
return 0;
}

/**
* @brief Send a produce request in the middle of an offset validation
* and expect that the fetched message is discarded, don't producing
* a duplicate when state becomes active again. See #4249.
*/
static void do_test_no_duplicates_during_offset_validation(void) {
const char *topic = test_mk_topic_name(__FUNCTION__, 1);
const char *c1_groupid = topic;
rd_kafka_t *c1;
rd_kafka_conf_t *conf, *conf_producer;
const char *bootstraps;
rd_kafka_mock_cluster_t *mcluster;
int initial_msg_count = 5;
thrd_t thrd;
struct _produce_args args = RD_ZERO_INIT;
uint64_t testid = test_id_generate();

SUB_TEST_QUICK();

mcluster = test_mock_cluster_new(1, &bootstraps);
rd_kafka_mock_topic_create(mcluster, topic, 1, 1);

/* Slow down OffsetForLeaderEpoch so a produce and
* subsequent fetch can happen while it's in-flight */
rd_kafka_mock_broker_push_request_error_rtts(
mcluster, 1, RD_KAFKAP_OffsetForLeaderEpoch, 1,
RD_KAFKA_RESP_ERR_NO_ERROR, 5000);

test_conf_init(&conf_producer, NULL, 60);
test_conf_set(conf_producer, "bootstrap.servers", bootstraps);


/* Seed the topic with messages */
test_produce_msgs_easy_v(topic, testid, 0, 0, initial_msg_count, 10,
"bootstrap.servers", bootstraps,
"batch.num.messages", "1", NULL);

args.topic = topic;
/* Makes that the message is produced while an offset validation
* is ongoing */
args.sleep = 5;
args.conf = conf_producer;
/* Spin up concurrent thread */
if (thrd_create(&thrd, produce_concurrent_thread, (void *)&args) !=
thrd_success)
TEST_FAIL("Failed to create thread");

test_conf_init(&conf, NULL, 60);

test_conf_set(conf, "bootstrap.servers", bootstraps);
/* Makes that an offset validation happens at the same
* time a new message is being produced */
test_conf_set(conf, "topic.metadata.refresh.interval.ms", "5000");
test_conf_set(conf, "auto.offset.reset", "earliest");
test_conf_set(conf, "enable.auto.commit", "false");
test_conf_set(conf, "enable.auto.offset.store", "false");
test_conf_set(conf, "enable.partition.eof", "true");

c1 = test_create_consumer(c1_groupid, NULL, conf, NULL);
test_consumer_subscribe(c1, topic);

/* Consume initial messages */
test_consumer_poll("MSG_INIT", c1, testid, 0, 0, initial_msg_count,
NULL);
/* EOF after initial messages */
test_consumer_poll("MSG_EOF", c1, testid, 1, initial_msg_count, 0,
NULL);
/* Concurrent producer message and EOF */
test_consumer_poll("MSG_AND_EOF", c1, testid, 1, initial_msg_count, 1,
NULL);
/* Only an EOF, not a duplicate message */
test_consumer_poll("MSG_EOF2", c1, testid, 1, initial_msg_count, 0,
NULL);

thrd_join(thrd, NULL);

rd_kafka_destroy(c1);

test_mock_cluster_destroy(mcluster);

TEST_LATER_CHECK();
SUB_TEST_PASS();
}

int main_0139_offset_validation_mock(int argc, char **argv) {

do_test_no_duplicates_during_offset_validation();

return 0;
}
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ set(
0136-resolve_cb.c
0137-barrier_batch_consume.c
0138-admin_mock.c
0139-offset_validation_mock.c
8000-idle.cpp
8001-fetch_from_follower_mock_manual.c
test.c
Expand Down
3 changes: 3 additions & 0 deletions tests/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ _TEST_DECL(0135_sasl_credentials);
_TEST_DECL(0136_resolve_cb);
_TEST_DECL(0137_barrier_batch_consume);
_TEST_DECL(0138_admin_mock);
_TEST_DECL(0139_offset_validation_mock);


/* Manual tests */
_TEST_DECL(8000_idle);
Expand Down Expand Up @@ -491,6 +493,7 @@ struct test tests[] = {
_TEST(0136_resolve_cb, TEST_F_LOCAL),
_TEST(0137_barrier_batch_consume, 0),
_TEST(0138_admin_mock, TEST_F_LOCAL, TEST_BRKVER(2, 4, 0, 0)),
_TEST(0139_offset_validation_mock, 0),

/* Manual tests */
_TEST(8000_idle, TEST_F_MANUAL),
Expand Down
1 change: 1 addition & 0 deletions win32/tests/tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@
<ClCompile Include="..\..\tests\0136-resolve_cb.c" />
<ClCompile Include="..\..\tests\0137-barrier_batch_consume.c" />
<ClCompile Include="..\..\tests\0138-admin_mock.c" />
<ClCompile Include="..\..\tests\0139-offset_validation_mock.c" />
<ClCompile Include="..\..\tests\8000-idle.cpp" />
<ClCompile Include="..\..\tests\8001-fetch_from_follower_mock_manual.c" />
<ClCompile Include="..\..\tests\test.c" />
Expand Down