Skip to content

fix(transaction): Fix schedule queues #4925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion src/server/list_family_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include <absl/strings/match.h>

#include <random>

#include "base/gtest.h"
#include "base/logging.h"
#include "facade/facade_test.h"
Expand All @@ -16,6 +18,7 @@
#include "server/string_family.h"
#include "server/test_utils.h"
#include "server/transaction.h"
#include "util/fibers/fibers.h"

using namespace testing;
using namespace std;
Expand Down Expand Up @@ -1323,7 +1326,7 @@ TEST_F(ListFamilyTest, LMPopWrongType) {
EXPECT_THAT(resp, RespArray(ElementsAre("l1", RespArray(ElementsAre("e1")))));
}

// Reproduce a flow that trigerred a wrong DCHECK in the transaction flow.
// Blocking command wakeup is complicated by running multi transaction at the same time
TEST_F(ListFamilyTest, AwakeMulti) {
auto f1 = pp_->at(1)->LaunchFiber(Launch::dispatch, [&] {
for (unsigned i = 0; i < 100; ++i) {
Expand Down Expand Up @@ -1352,6 +1355,33 @@ TEST_F(ListFamilyTest, AwakeMulti) {
f3.Join();
}

TEST_F(ListFamilyTest, PressureBLMove) {
#ifndef NDEBUG
GTEST_SKIP() << "Requires release build to reproduce";
#endif

auto consumer = [this](string_view id, string_view src, string_view dest) {
for (unsigned i = 0; i < 1000; ++i) {
Run(id, {"blmove", src, dest, "LEFT", "LEFT", "0"});
};
};
auto producer = [this](string_view id, size_t delay, string_view src) {
for (unsigned i = 0; i < 1000; ++i) {
Run(id, {"lpush", src, "a"});
ThisFiber::SleepFor(1us * delay);
}
};

for (size_t delay : {1, 2, 5}) {
LOG(INFO) << "Running with delay: " << delay;
auto f1 = pp_->at(1)->LaunchFiber([=] { consumer("c1", "src", "dest"); });
auto f2 = pp_->at(1)->LaunchFiber([=] { producer("p1", delay, "src"); });

f1.Join();
f2.Join();
}
}

TEST_F(ListFamilyTest, AwakeDb1) {
const char* kDbId = "1";

Expand Down
8 changes: 2 additions & 6 deletions src/server/transaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -766,11 +766,7 @@ void Transaction::ScheduleInternal() {

ScheduleContext schedule_ctx{this, optimistic_exec};

// TODO: this optimization is disabled due to a issue #4648 revealing this code can
// lead to transaction not being scheduled.
// To reproduce the bug remove the false in the condition and run
// ./list_family_test --gtest_filter=*AwakeMulti on alpine machine
if (false && unique_shard_cnt_ == 1) {
if (unique_shard_cnt_ == 1) {
// Single shard optimization. Note: we could apply the same optimization
// to multi-shard transactions as well by creating a vector of ScheduleContext.
schedule_queues[unique_shard_id_].queue.Push(&schedule_ctx);
Expand Down Expand Up @@ -1221,7 +1217,7 @@ void Transaction::ScheduleBatchInShard() {
// We do this to avoid the situation where we have a data race, where
// a transaction is added to the queue, we've checked that sq.armed is true and skipped
// adding the callback that fetches the transaction.
sq.armed.store(false, memory_order_release);
sq.armed.exchange(false, memory_order_acq_rel);
}
}

Expand Down
Loading