diff --git a/src/clients/storage/InternalStorageClient.cpp b/src/clients/storage/InternalStorageClient.cpp index ad2a7c58960..67e635c318b 100644 --- a/src/clients/storage/InternalStorageClient.cpp +++ b/src/clients/storage/InternalStorageClient.cpp @@ -22,6 +22,8 @@ ::nebula::cpp2::ErrorCode getErrorCode(T& tryResp) { switch (stResp.status().code()) { case Status::Code::kLeaderChanged: return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; + case Status::Code::kError: + return nebula::cpp2::ErrorCode::E_RPC_FAILURE; default: LOG(ERROR) << "not impl error transform: code=" << static_cast(stResp.status().code()); @@ -69,8 +71,8 @@ void InternalStorageClient::chainUpdateEdge(cpp2::UpdateEdgeRequest& reversedReq std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); + VLOG(1) << "chainUpdateEdge rpc: " << apache::thrift::util::enumNameSafe(code); if (code == ::nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainUpdateEdge(reversedRequest, termOfSrc, optVersion, std::move(p)); } else { p.setValue(code); @@ -108,7 +110,6 @@ void InternalStorageClient::chainAddEdges(cpp2::AddEdgesRequest& directReq, std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); if (code == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainAddEdges(directReq, termId, optVersion, std::move(p)); } else { p.setValue(code); @@ -165,7 +166,6 @@ void InternalStorageClient::chainDeleteEdges(cpp2::DeleteEdgesRequest& req, std::move(resp).thenTry([=, p = std::move(p)](auto&& t) mutable { auto code = getErrorCode(t); if (code == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - std::this_thread::sleep_for(std::chrono::milliseconds(500)); chainDeleteEdges(req, txnId, termId, std::move(p)); } else { p.setValue(code); diff --git a/src/common/utils/MemoryLockWrapper.h b/src/common/utils/MemoryLockWrapper.h index cf0db2d807b..5378a6d0a0b 100644 --- a/src/common/utils/MemoryLockWrapper.h +++ b/src/common/utils/MemoryLockWrapper.h @@ -51,7 +51,7 @@ class MemoryLockGuard { } ~MemoryLockGuard() { - if (locked_) { + if (locked_ && autoUnlock_) { lock_->unlockBatch(keys_); } } @@ -71,15 +71,8 @@ class MemoryLockGuard { return *iter_; } - // this will manual set the lock to unlocked state - // which mean will not release all locks automatically - // please make sure you really know the side effect - void forceLock() { - locked_ = true; - } - - void forceUnlock() { - locked_ = false; + void setAutoUnlock(bool autoUnlock) { + autoUnlock_ = autoUnlock; } protected: @@ -87,6 +80,7 @@ class MemoryLockGuard { std::vector keys_; typename std::vector::iterator iter_; bool locked_{false}; + bool autoUnlock_{true}; }; } // namespace nebula diff --git a/src/interface/common.thrift b/src/interface/common.thrift index 84e70effdcc..695bb4e2f04 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -497,6 +497,7 @@ enum ErrorCode { E_RAFT_WRITE_BLOCKED = -3528, E_RAFT_BUFFER_OVERFLOW = -3529, E_RAFT_ATOMIC_OP_FAILED = -3530, + E_LEADER_LEASE_FAILED = -3531, E_UNKNOWN = -8000, } (cpp.enum_strict) diff --git a/src/kvstore/NebulaStore.cpp b/src/kvstore/NebulaStore.cpp index 43623e22ef0..18cab45c9ed 100644 --- a/src/kvstore/NebulaStore.cpp +++ b/src/kvstore/NebulaStore.cpp @@ -584,7 +584,8 @@ nebula::cpp2::ErrorCode NebulaStore::get(GraphSpaceID spaceId, } auto part = nebula::value(ret); if (!checkLeader(part, canReadFromFollower)) { - return nebula::cpp2::ErrorCode::E_LEADER_CHANGED; + return part->isLeader() ? nebula::cpp2::ErrorCode::E_LEADER_LEASE_FAILED + : nebula::cpp2::ErrorCode::E_LEADER_CHANGED; } return part->engine()->get(key, value); } diff --git a/src/meta/upgrade/v2/meta.thrift b/src/meta/upgrade/v2/meta.thrift index d46922478b8..8dbe888140f 100644 --- a/src/meta/upgrade/v2/meta.thrift +++ b/src/meta/upgrade/v2/meta.thrift @@ -26,7 +26,7 @@ struct SpaceDesc { 3: i32 replica_factor = 0, 4: binary charset_name, 5: binary collate_name, - 6: ColumnTypeDef vid_type = {"type": PropertyType.FIXED_STRING, "type_length": 8}, + 6: ColumnTypeDef vid_type = {"type": "PropertyType.FIXED_STRING", "type_length": 8}, 7: optional binary group_name, 8: optional IsolationLevel isolation_level, 9: optional binary comment, @@ -78,4 +78,4 @@ struct ColumnTypeDef { enum IsolationLevel { DEFAULT = 0x00, // allow add half edge(either in or out edge succeeded) TOSS = 0x01, // add in and out edge atomic -} (cpp.enum_strict) \ No newline at end of file +} (cpp.enum_strict) diff --git a/src/mock/MockCluster.cpp b/src/mock/MockCluster.cpp index 0eb5407fa6f..c9d8fbaae62 100644 --- a/src/mock/MockCluster.cpp +++ b/src/mock/MockCluster.cpp @@ -213,6 +213,7 @@ void MockCluster::initStorageKV(const char* dataPath, txnMan_ = std::make_unique(storageEnv_.get()); storageEnv_->txnMan_ = txnMan_.get(); + txnMan_->start(); } void MockCluster::startStorage(HostAddr addr, diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 6a5ade90293..b1227f4bcd8 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -75,14 +75,13 @@ nebula_add_library( transaction/ConsistUtil.cpp transaction/ChainUpdateEdgeLocalProcessor.cpp transaction/ChainUpdateEdgeRemoteProcessor.cpp - transaction/ChainResumeProcessor.cpp transaction/ChainAddEdgesGroupProcessor.cpp transaction/ChainAddEdgesLocalProcessor.cpp transaction/ChainAddEdgesRemoteProcessor.cpp - transaction/ResumeAddEdgeProcessor.cpp - transaction/ResumeAddEdgeRemoteProcessor.cpp - transaction/ResumeUpdateProcessor.cpp - transaction/ResumeUpdateRemoteProcessor.cpp + transaction/ChainResumeAddPrimeProcessor.cpp + transaction/ChainResumeAddDoublePrimeProcessor.cpp + transaction/ChainResumeUpdatePrimeProcessor.cpp + transaction/ChainResumeUpdateDoublePrimeProcessor.cpp transaction/ChainProcessorFactory.cpp transaction/ChainDeleteEdgesGroupProcessor.cpp transaction/ChainDeleteEdgesLocalProcessor.cpp diff --git a/src/storage/InternalStorageServiceHandler.h b/src/storage/InternalStorageServiceHandler.h index 01407c3b204..10bb052ed05 100644 --- a/src/storage/InternalStorageServiceHandler.h +++ b/src/storage/InternalStorageServiceHandler.h @@ -22,13 +22,14 @@ class InternalStorageServiceHandler final : public cpp2::InternalStorageServiceS public: explicit InternalStorageServiceHandler(StorageEnv* env); - folly::Future future_chainAddEdges(const cpp2::ChainAddEdgesRequest& p_req); + folly::Future future_chainAddEdges( + const cpp2::ChainAddEdgesRequest& p_req) override; folly::Future future_chainUpdateEdge( - const cpp2::ChainUpdateEdgeRequest& p_req); + const cpp2::ChainUpdateEdgeRequest& p_req) override; folly::Future future_chainDeleteEdges( - const cpp2::ChainDeleteEdgesRequest& p_req); + const cpp2::ChainDeleteEdgesRequest& p_req) override; private: StorageEnv* env_{nullptr}; diff --git a/src/storage/StorageServer.cpp b/src/storage/StorageServer.cpp index eec3e5fff4a..b12632b4111 100644 --- a/src/storage/StorageServer.cpp +++ b/src/storage/StorageServer.cpp @@ -397,6 +397,7 @@ void StorageServer::stop() { if (txnMan_) { txnMan_->stop(); + txnMan_->join(); } if (taskMgr_) { taskMgr_->shutdown(); diff --git a/src/storage/index/LookupProcessor.h b/src/storage/index/LookupProcessor.h index 00d6f8f55fa..05012f5f560 100644 --- a/src/storage/index/LookupProcessor.h +++ b/src/storage/index/LookupProcessor.h @@ -43,6 +43,9 @@ class LookupProcessor : public BaseProcessor { folly::Executor* executor_{nullptr}; std::unique_ptr planContext_; std::unique_ptr context_; + /** + * @brief the final output + */ nebula::DataSet resultDataSet_; nebula::DataSet statsDataSet_; std::vector partResults_; diff --git a/src/storage/kv/GetProcessor.h b/src/storage/kv/GetProcessor.h index 7caa28d237e..a4fa6907223 100644 --- a/src/storage/kv/GetProcessor.h +++ b/src/storage/kv/GetProcessor.h @@ -14,6 +14,9 @@ namespace storage { extern ProcessorCounters kGetCounters; +/** + * @brief this is a simple get() interface when storage run in KV mode. + */ class GetProcessor : public BaseProcessor { public: static GetProcessor* instance(StorageEnv* env, diff --git a/src/storage/kv/PutProcessor.h b/src/storage/kv/PutProcessor.h index 101cc183097..7888abd64dd 100644 --- a/src/storage/kv/PutProcessor.h +++ b/src/storage/kv/PutProcessor.h @@ -13,7 +13,9 @@ namespace nebula { namespace storage { extern ProcessorCounters kPutCounters; - +/** + * @brief this is a simple put() interface when storage run in KV mode. + */ class PutProcessor : public BaseProcessor { public: static PutProcessor* instance(StorageEnv* env, diff --git a/src/storage/kv/RemoveProcessor.h b/src/storage/kv/RemoveProcessor.h index 59bab864e87..11dfc5febe2 100644 --- a/src/storage/kv/RemoveProcessor.h +++ b/src/storage/kv/RemoveProcessor.h @@ -14,6 +14,9 @@ namespace storage { extern ProcessorCounters kRemoveCounters; +/** + * @brief this is a simple remove() interface when storage run in KV mode. + */ class RemoveProcessor : public BaseProcessor { public: static RemoveProcessor* instance(StorageEnv* env, diff --git a/src/storage/test/CMakeLists.txt b/src/storage/test/CMakeLists.txt index 38d59a24f93..b7c255f55e5 100644 --- a/src/storage/test/CMakeLists.txt +++ b/src/storage/test/CMakeLists.txt @@ -754,20 +754,20 @@ nebula_add_executable( gtest ) -nebula_add_executable( - NAME - chain_resume_edge_test - SOURCES - ChainResumeEdgeTest.cpp - OBJECTS - ${storage_test_deps} - LIBRARIES - ${ROCKSDB_LIBRARIES} - ${THRIFT_LIBRARIES} - ${PROXYGEN_LIBRARIES} - wangle - gtest -) +# nebula_add_executable( +# NAME +# chain_resume_edge_test +# SOURCES +# ChainResumeEdgeTest.cpp +# OBJECTS +# ${storage_test_deps} +# LIBRARIES +# ${ROCKSDB_LIBRARIES} +# ${THRIFT_LIBRARIES} +# ${PROXYGEN_LIBRARIES} +# wangle +# gtest +# ) nebula_add_executable( NAME diff --git a/src/storage/test/ChainAddEdgesTest.cpp b/src/storage/test/ChainAddEdgesTest.cpp index 3881e0cc671..a8d0d7cbb26 100644 --- a/src/storage/test/ChainAddEdgesTest.cpp +++ b/src/storage/test/ChainAddEdgesTest.cpp @@ -27,6 +27,7 @@ namespace storage { constexpr int32_t mockSpaceId = 1; constexpr int32_t mockPartNum = 1; constexpr int32_t fackTerm = 1; +constexpr auto suc = nebula::cpp2::ErrorCode::SUCCEEDED; // make sure test class works well TEST(ChainAddEdgesTest, TestUtilsTest) { @@ -38,23 +39,23 @@ TEST(ChainAddEdgesTest, TestUtilsTest) { env->metaClient_ = mClient.get(); MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); - auto* processor = new FakeChainAddEdgesLocalProcessor(env); + auto* proc = new FakeChainAddEdgesLocalProcessor(env); - processor->rcPrepareLocal = nebula::cpp2::ErrorCode::SUCCEEDED; - processor->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; - processor->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + proc->setPrepareCode(suc); + proc->setRemoteCode(suc); + proc->setCommitCode(suc); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); LOG(INFO) << "Test AddEdgesProcessor..."; - auto fut = processor->getFuture(); - processor->process(req); + auto fut = proc->getFuture(); + proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(0, resp.result.failed_parts.size()); LOG(INFO) << "Check data in kv store..."; // The number of data in serve is 334 + EXPECT_EQ(0, resp.result.failed_parts.size()); checkAddEdgesData(req, env, 0, 0); } @@ -68,7 +69,7 @@ TEST(ChainAddEdgesTest, prepareLocalSucceedTest) { MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_RPC_FAILURE); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -127,7 +128,7 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_OUTDATED_TERM; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_OUTDATED_TERM); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -136,7 +137,7 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(1, resp.result.failed_parts.size()); + EXPECT_EQ(0, resp.result.failed_parts.size()); ChainTestUtils util; // none of really edge key should be inserted @@ -144,8 +145,6 @@ TEST(ChainAddEdgesTest, processRemoteFailedTest) { // prime key should be deleted EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - // env->txnMan_->stop(); } TEST(ChainAddEdgesTest, processRemoteUnknownTest) { @@ -159,7 +158,7 @@ TEST(ChainAddEdgesTest, processRemoteUnknownTest) { auto* proc = new FakeChainAddEdgesLocalProcessor(env); - proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + proc->setRemoteCode(nebula::cpp2::ErrorCode::E_RPC_FAILURE); LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -168,39 +167,22 @@ TEST(ChainAddEdgesTest, processRemoteUnknownTest) { auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); - EXPECT_EQ(0, resp.result.failed_parts.size()); ChainTestUtils util; // none of really edge key should be inserted + EXPECT_EQ(0, resp.result.failed_parts.size()); EXPECT_EQ(334, numOfKey(req, util.genKey, env)); // prime key should be deleted EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); } -// make a reversed request, make sure it can be added successfully -TEST(ChainAddEdgesTest, processRemoteTest) { - fs::TempDir rootPath("/tmp/AddEdgesTest.XXXXXX"); - mock::MockCluster cluster; - cluster.initStorageKV(rootPath.path()); - auto* env = cluster.storageEnv_.get(); - auto mClient = MetaClientTestUpdater::makeDefault(); - - env->metaClient_ = mClient.get(); - MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); - - auto* proc = new FakeChainAddEdgesLocalProcessor(env); - LOG(INFO) << "Build AddEdgesRequest..."; - cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); - - auto reversedRequest = proc->reverseRequestForward(req); - delete proc; -} - } // namespace storage } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); diff --git a/src/storage/test/ChainDeleteEdgesTest.cpp b/src/storage/test/ChainDeleteEdgesTest.cpp index 932c895210d..ee4316d380d 100644 --- a/src/storage/test/ChainDeleteEdgesTest.cpp +++ b/src/storage/test/ChainDeleteEdgesTest.cpp @@ -222,6 +222,9 @@ TEST(ChainDeleteEdgesTest, DISABLED_Test5) { delProc->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; delProc->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -231,16 +234,13 @@ TEST(ChainDeleteEdgesTest, DISABLED_Test5) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 167); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - // std::this_thread::sleep_for(std::chrono::milliseconds()); + for (PartitionID i = 1; i <= partNum; ++i) { + env->txnMan_->scanPrimes(mockSpaceId, i, 1); + } + env->txnMan_->stop(); + env->txnMan_->join(); num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - - delete iClient; } // add some edges, then delete all of them, not execute local commit @@ -277,6 +277,9 @@ TEST(ChainDeleteEdgesTest, Test6) { delProc->rcProcessRemote = nebula::cpp2::ErrorCode::SUCCEEDED; delProc->rcProcessLocal = nebula::cpp2::ErrorCode::SUCCEEDED; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -286,16 +289,18 @@ TEST(ChainDeleteEdgesTest, Test6) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 167); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + for (PartitionID i = 1; i <= partNum; ++i) { + env->txnMan_->scanPrimes(mockSpaceId, i); + } + // ChainResumeProcessor resumeProc(env); + // resumeProc.process(); + std::this_thread::sleep_for(std::chrono::seconds(2)); + sleep(1); + env->txnMan_->stop(); + env->txnMan_->join(); + num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } // add some edges, delete one of them, rpc failure @@ -332,6 +337,9 @@ TEST(ChainDeleteEdgesTest, Test7) { auto delReq = delProc->makeDelRequest(addReq, limit); delProc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Run DeleteEdgesReq..."; auto futDel = delProc->getFuture(); delProc->process(delReq); @@ -341,20 +349,16 @@ TEST(ChainDeleteEdgesTest, Test7) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 166); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); + LOG(INFO) << "after recover()"; + num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 166); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } -// add some edges, then one all of them, rpc failure +// add some edges, delete all, rpc failure TEST(ChainDeleteEdgesTest, Test8) { fs::TempDir rootPath("/tmp/DeleteEdgesTest.XXXXXX"); mock::MockCluster cluster; @@ -397,16 +401,18 @@ TEST(ChainDeleteEdgesTest, Test8) { LOG(INFO) << "after del(), edge num = " << num; EXPECT_EQ(num, 0); - env->txnMan_->scanAll(); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + // for (PartitionID i = 1; i <= partNum; ++i) { + // env->txnMan_->scanPrimes(mockSpaceId, i); + // } + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + // ChainResumeProcessor resumeProc(env); + // resumeProc.process(); + + env->txnMan_->stop(); + env->txnMan_->join(); num = util.checkNumOfKey(env, mockSpaceId, edgeKeys); EXPECT_EQ(num, 0); - std::this_thread::sleep_for(std::chrono::milliseconds(300)); - - delete iClient; } } // namespace storage @@ -414,6 +420,7 @@ TEST(ChainDeleteEdgesTest, Test8) { int main(int argc, char** argv) { FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); diff --git a/src/storage/test/ChainResumeEdgeTest.cpp b/src/storage/test/ChainResumeEdgeTest.cpp index 9c985a8462d..716b8263672 100644 --- a/src/storage/test/ChainResumeEdgeTest.cpp +++ b/src/storage/test/ChainResumeEdgeTest.cpp @@ -20,7 +20,6 @@ #include "storage/test/TestUtils.h" #include "storage/transaction/ChainAddEdgesGroupProcessor.h" #include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainResumeProcessor.h" #include "storage/transaction/ConsistUtil.h" namespace nebula { @@ -58,6 +57,9 @@ TEST(ChainResumeEdgesTest, resumeTest1) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, mockPartNum); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); @@ -70,16 +72,12 @@ TEST(ChainResumeEdgesTest, resumeTest1) { env->txnMan_->scanPrimes(1, i); } - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genDoublePrime, env)); - - delete iClient; } /** @@ -107,6 +105,9 @@ TEST(ChainResumeEdgesTest, resumeTest2) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, mockPartNum); + UPCLT iClient(FakeInternalStorageClient::instance(env, Code::E_UNKNOWN)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); @@ -118,16 +119,16 @@ TEST(ChainResumeEdgesTest, resumeTest2) { EXPECT_EQ(334, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - auto* iClient = FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + for (int32_t i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } + + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(0, numOfKey(req, util.genKey, env)); EXPECT_EQ(334, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -161,22 +162,21 @@ TEST(ChainResumeEdgesTest, resumeTest3) { EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); + + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); // none of really edge key should be inserted EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -204,28 +204,26 @@ TEST(ChainResumeEdgesTest, resumeTest4) { int partNum = 1; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, partNum); + auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); auto resp = std::move(fut).get(); EXPECT_EQ(0, resp.result.failed_parts.size()); - // ChainTestUtils util; EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(334, numOfKey(req, gTestUtil.genDoublePrime, env)); - auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, gTestUtil.genKey, env)); EXPECT_EQ(0, numOfKey(req, gTestUtil.genPrime, env)); EXPECT_EQ(334, numOfKey(req, gTestUtil.genDoublePrime, env)); - - delete iClient; } /** @@ -243,6 +241,10 @@ TEST(ChainResumeEdgesTest, resumeTest5) { proc->rcProcessRemote = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); @@ -252,22 +254,17 @@ TEST(ChainResumeEdgesTest, resumeTest5) { auto resp = std::move(fut).get(); EXPECT_EQ(0, resp.result.failed_parts.size()); + env->txnMan_->stop(); + env->txnMan_->join(); + ChainTestUtils util; EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; - auto* iClient = FakeInternalStorageClient::instance(env, error); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } /** @@ -288,6 +285,9 @@ TEST(ChainResumeEdgesTest, resumeTest6) { LOG(INFO) << "Build AddEdgesRequest..."; cpp2::AddEdgesRequest req = mock::MockData::mockAddEdgesReq(false, 1); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Test AddEdgesProcessor..."; auto fut = proc->getFuture(); proc->process(req); @@ -299,21 +299,16 @@ TEST(ChainResumeEdgesTest, resumeTest6) { EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(334, numOfKey(req, util.genDoublePrime, env)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_EQ(334, numOfKey(req, util.genKey, env)); EXPECT_EQ(0, numOfKey(req, util.genPrime, env)); EXPECT_EQ(0, numOfKey(req, util.genDoublePrime, env)); - - delete iClient; } // resume an update left prime, check resume succeeded @@ -342,27 +337,24 @@ TEST(ChainUpdateEdgeTest, resumeTest7) { LOG(INFO) << "addUnfinishedEdge()"; proc->wrapAddUnfinishedEdge(ResumeType::RESUME_CHAIN); auto resp = std::move(f).get(); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); EXPECT_FALSE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - for (auto i = 1; i <= mockPartNum; ++i) { env->txnMan_->scanPrimes(1, i); } - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume failed @@ -389,23 +381,25 @@ TEST(ChainUpdateEdgeTest, resumeTest8) { proc->process(req); auto resp = std::move(f).get(); - // EXPECT_TRUE(helper.checkResp(req, resp)); + auto error = nebula::cpp2::ErrorCode::E_UNKNOWN; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + + for (auto i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } + + env->txnMan_->stop(); + env->txnMan_->join(); + EXPECT_FALSE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); - EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_TRUE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume outdated @@ -433,23 +427,20 @@ TEST(ChainUpdateEdgeTest, resumeTest9) { proc->wrapAddUnfinishedEdge(ResumeType::RESUME_CHAIN); auto resp = std::move(f).get(); - // EXPECT_TRUE(helper.checkResp(req, resp)); - EXPECT_FALSE(helper.checkRequestUpdated(env, req)); - EXPECT_TRUE(helper.edgeExist(env, req)); - EXPECT_TRUE(helper.primeExist(env, req)); - EXPECT_FALSE(helper.doublePrimeExist(env, req)); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + + for (auto i = 1; i <= mockPartNum; ++i) { + env->txnMan_->scanPrimes(1, i); + } - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_RPC_FAILURE); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, check resume succeeded @@ -461,18 +452,19 @@ TEST(ChainUpdateEdgeTest, resumeTest10) { auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); - // auto parts = cluster.getTotalParts(); auto parts = mockPartNum; EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + UPCLT iClient(FakeInternalStorageClient::instance(env)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -481,16 +473,12 @@ TEST(ChainUpdateEdgeTest, resumeTest10) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_FALSE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume failed @@ -509,11 +497,14 @@ TEST(ChainUpdateEdgeTest, resumeTest11) { LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -522,17 +513,12 @@ TEST(ChainUpdateEdgeTest, resumeTest11) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_UNKNOWN); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } // resume an update left prime, resume outdated @@ -551,11 +537,14 @@ TEST(ChainUpdateEdgeTest, resumeTest12) { LOG(INFO) << "Test UpdateEdgeRequest..."; auto req = helper.makeDefaultRequest(); + auto error = nebula::cpp2::ErrorCode::E_RPC_FAILURE; + UPCLT iClient(FakeInternalStorageClient::instance(env, error)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + LOG(INFO) << "Fake Prime..."; auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); proc->rcProcessRemote = Code::E_RPC_FAILURE; - // proc->rcProcessLocal = Code::SUCCEEDED; proc->process(req); auto resp = std::move(f).get(); @@ -564,22 +553,19 @@ TEST(ChainUpdateEdgeTest, resumeTest12) { EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - auto* iClient = FakeInternalStorageClient::instance(env); - iClient->setErrorCode(Code::E_RPC_FAILURE); - FakeInternalStorageClient::hookInternalStorageClient(env, iClient); - ChainResumeProcessor resumeProc(env); - resumeProc.process(); + env->txnMan_->stop(); + env->txnMan_->join(); EXPECT_TRUE(helper.edgeExist(env, req)); EXPECT_FALSE(helper.primeExist(env, req)); EXPECT_TRUE(helper.doublePrimeExist(env, req)); - - delete iClient; } } // namespace storage } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); diff --git a/src/storage/test/ChainTestUtils.h b/src/storage/test/ChainTestUtils.h index 0fd04ca00ee..da819605d60 100644 --- a/src/storage/test/ChainTestUtils.h +++ b/src/storage/test/ChainTestUtils.h @@ -6,7 +6,7 @@ #pragma once #include "storage/CommonUtils.h" -#include "storage/transaction/ChainResumeProcessor.h" +#include "storage/transaction/ChainAddEdgesLocalProcessor.h" #include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" #include "storage/transaction/ChainUpdateEdgeRemoteProcessor.h" @@ -19,6 +19,26 @@ extern const int32_t mockSpaceVidLen; using KeyGenerator = std::function; +struct TransactionManagerTester { + explicit TransactionManagerTester(TransactionManager* p) : man_(p) {} + + void stop() { + man_->stop(); + int32_t numCheckIdle = 0; + while (numCheckIdle < 3) { + auto stats = man_->exec_->getPoolStats(); + if (stats.threadCount == stats.idleThreadCount) { + ++numCheckIdle; + } else { + numCheckIdle = 0; + } + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + } + + TransactionManager* man_{nullptr}; +}; + class ChainTestUtils { public: ChainTestUtils() { @@ -125,6 +145,8 @@ class FakeChainAddEdgesLocalProcessor : public ChainAddEdgesLocalProcessor { public: explicit FakeChainAddEdgesLocalProcessor(StorageEnv* env) : ChainAddEdgesLocalProcessor(env) { spaceVidLen_ = 32; + rcRemote_ = Code::SUCCEEDED; + rcCommit_ = Code::SUCCEEDED; } folly::SemiFuture prepareLocal() override { @@ -168,6 +190,28 @@ class FakeChainAddEdgesLocalProcessor : public ChainAddEdgesLocalProcessor { folly::Optional rcProcessRemote; folly::Optional rcProcessLocal; + + void setPrepareCode(Code code, Code rc = Code::SUCCEEDED) { + rcPrepareLocal = code; + rcPrepare_ = rc; + } + + void setRemoteCode(Code code) { + rcProcessRemote = code; + rcRemote_ = code; + } + + void setCommitCode(Code code, Code rc = Code::SUCCEEDED) { + rcProcessLocal = code; + rcCommit_ = rc; + } + + void finish() { + auto rc = (rcPrepare_ == Code::SUCCEEDED) ? rcCommit_ : rcPrepare_; + pushResultCode(rc, localPartId_); + finished_.setValue(rc); + onFinished(); + } }; class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { @@ -187,7 +231,7 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } folly::SemiFuture processRemote(Code code) override { - LOG(INFO) << "FakeChainUpdateEdgeProcessorA::" << __func__ << "()"; + LOG(INFO) << "FakeChainUpdateEdgeProcessor::" << __func__ << "()"; if (rcProcessRemote) { LOG(INFO) << "processRemote() fake return " << apache::thrift::util::enumNameSafe(*rcProcessRemote); @@ -199,7 +243,7 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } folly::SemiFuture processLocal(Code code) override { - LOG(INFO) << "FakeChainUpdateEdgeProcessorA::" << __func__ << "()"; + LOG(INFO) << "FakeChainUpdateEdgeProcessor::" << __func__ << "()"; if (rcProcessLocal) { LOG(INFO) << "processLocal() fake return " << apache::thrift::util::enumNameSafe(*rcProcessLocal); @@ -210,13 +254,61 @@ class FakeChainUpdateProcessor : public ChainUpdateEdgeLocalProcessor { } void wrapAddUnfinishedEdge(ResumeType type) { - addUnfinishedEdge(type); + reportFailed(type); + } + void setPrepareCode(Code code, Code rc = Code::SUCCEEDED) { + rcPrepareLocal = code; + rcPrepare_ = rc; + } + + void setRemoteCode(Code code) { + rcProcessRemote = code; + rcRemote_ = code; + } + + void setCommitCode(Code code, Code rc = Code::SUCCEEDED) { + rcProcessLocal = code; + rcCommit_ = rc; + } + + void setDoRecover(bool doRecover) { + doRecover_ = doRecover; + } + + void finish() override { + if (doRecover_) { + LOG(INFO) << "do real finish()"; + ChainUpdateEdgeLocalProcessor::finish(); + } else { + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + + pushResultCode(rc, localPartId_); + finished_.setValue(rc); + onFinished(); + } } public: folly::Optional rcPrepareLocal; folly::Optional rcProcessRemote; folly::Optional rcProcessLocal; + bool doRecover_{false}; }; class MetaClientTestUpdater { @@ -255,17 +347,20 @@ class MetaClientTestUpdater { meta::MetaClientOptions options; auto mClient = std::make_unique(exec, addrs, options); - mClient->localCache_[mockSpaceId] = std::make_shared(); + auto spSpaceInfoCache = std::make_shared(); + addLocalCache(*mClient, mockSpaceId, spSpaceInfoCache); + auto* pCache = getLocalCache(mClient.get(), mockSpaceId); + for (int i = 0; i != mockPartNum; ++i) { - mClient->localCache_[mockSpaceId]->termOfPartition_[i] = i; - auto ignoreItem = mClient->localCache_[mockSpaceId]->partsAlloc_[i]; + pCache->termOfPartition_[i] = i; + auto ignoreItem = pCache->partsAlloc_[i]; UNUSED(ignoreItem); } meta::cpp2::ColumnTypeDef type; type.type_ref() = nebula::cpp2::PropertyType::FIXED_STRING; type.type_length_ref() = 32; - mClient->localCache_[mockSpaceId]->spaceDesc_.vid_type_ref() = std::move(type); + pCache->spaceDesc_.vid_type_ref() = std::move(type); mClient->ready_ = true; return mClient; } @@ -328,12 +423,10 @@ class FakeInternalStorageClient : public InternalStorageClient { static FakeInternalStorageClient* instance(StorageEnv* env, Code fakeCode = Code::SUCCEEDED) { auto pool = std::make_shared(3); return new FakeInternalStorageClient(env, pool, fakeCode); - // static FakeInternalStorageClient client(env, pool, fakeCode); - // return &client; } static void hookInternalStorageClient(StorageEnv* env, InternalStorageClient* client) { - env->txnMan_->iClient_ = client; + env->interClient_ = client; } private: @@ -341,6 +434,8 @@ class FakeInternalStorageClient : public InternalStorageClient { Code code_{Code::SUCCEEDED}; }; +using UPCLT = std::unique_ptr; + struct ChainUpdateEdgeTestHelper { ChainUpdateEdgeTestHelper() { sEdgeType = std::to_string(std::abs(edgeType_)); @@ -425,8 +520,8 @@ struct ChainUpdateEdgeTestHelper { return req; } - bool checkResp2(cpp2::UpdateResponse& resp) { - LOG(INFO) << "checkResp2(cpp2::UpdateResponse& resp)"; + bool checkResp(cpp2::UpdateResponse& resp) { + LOG(INFO) << "checkResp(cpp2::UpdateResponse& resp)"; if (!resp.props_ref()) { LOG(INFO) << "!resp.props_ref()"; return false; @@ -506,7 +601,6 @@ struct ChainUpdateEdgeTestHelper { auto val1 = cexpr->value(); auto val2 = edgeReader->getValueByName(prop.get_name()); - // EXPECT_EQ(val1, val2); if (val1 != val2) { ret = false; } @@ -524,25 +618,5 @@ struct ChainUpdateEdgeTestHelper { std::string sEdgeType; }; -// class ChainResumeProcessorTestHelper { -// public: -// explicit ChainResumeProcessorTestHelper(ChainResumeProcessor* proc) : proc_(proc) {} - -// void setAddEdgeProc(ChainAddEdgesLocalProcessor* proc) { -// proc_->addProc = proc; -// } - -// // setUpdProc -// void setUpdProc(ChainUpdateEdgeLocalProcessor* proc) { -// proc_->updProc = proc; -// } - -// std::string getTxnId() { -// return proc_->addProc->txnId_; -// } -// public: -// ChainResumeProcessor* proc_{nullptr}; -// }; - } // namespace storage } // namespace nebula diff --git a/src/storage/test/ChainUpdateEdgeTest.cpp b/src/storage/test/ChainUpdateEdgeTest.cpp index 6249dac0bdf..ec8e219f883 100644 --- a/src/storage/test/ChainUpdateEdgeTest.cpp +++ b/src/storage/test/ChainUpdateEdgeTest.cpp @@ -21,22 +21,26 @@ #include "storage/test/TestUtils.h" #include "storage/transaction/ChainAddEdgesGroupProcessor.h" #include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainResumeProcessor.h" #include "storage/transaction/ChainUpdateEdgeRemoteProcessor.h" #include "storage/transaction/ConsistUtil.h" namespace nebula { namespace storage { -// using Code = ::nebula::cpp2::ErrorCode; - constexpr int32_t mockSpaceId = 1; constexpr int32_t mockPartNum = 6; +constexpr int32_t fackTerm = 1; constexpr int32_t mockSpaceVidLen = 32; ChainTestUtils gTestUtil; - ChainUpdateEdgeTestHelper helper; + +/** + * @brief do a normal update will succeeded + * 1. prepare environment + * 2. do an normal update (with out any error) + * 3. check edge request updated + */ TEST(ChainUpdateEdgeTest, updateTest1) { fs::TempDir rootPath("/tmp/UpdateEdgeTest.XXXXXX"); mock::MockCluster cluster; @@ -44,23 +48,28 @@ TEST(ChainUpdateEdgeTest, updateTest1) { auto* env = cluster.storageEnv_.get(); auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); + MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); + auto stPartsNum = env->metaClient_->partsNum(mockSpaceId); + if (stPartsNum.ok()) { + LOG(INFO) << "stPartsNum.value()=" << stPartsNum.value(); + } auto parts = cluster.getTotalParts(); + LOG(INFO) << "parts: " << parts; EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); LOG(INFO) << "Test updateTest1..."; auto req = helper.makeDefaultRequest(); - env->txnMan_->iClient_ = FakeInternalStorageClient::instance(env); + env->interClient_ = FakeInternalStorageClient::instance(env); auto reversedRequest = helper.reverseRequest(env, req); auto* proc = new FakeChainUpdateProcessor(env); - LOG(INFO) << "proc: " << proc; auto f = proc->getFuture(); proc->process(req); auto resp = std::move(f).get(); - EXPECT_TRUE(helper.checkResp2(resp)); + EXPECT_TRUE(helper.checkResp(resp)); EXPECT_TRUE(helper.checkRequestUpdated(env, req)); EXPECT_TRUE(helper.checkRequestUpdated(env, reversedRequest)); EXPECT_TRUE(helper.edgeExist(env, req)); @@ -68,6 +77,16 @@ TEST(ChainUpdateEdgeTest, updateTest1) { EXPECT_FALSE(helper.doublePrimeExist(env, req)); } +/** + * @brief updateTest2 (update non-exist edge will fail) + * 1. prepare environment + * 2. do a failed update + * 3. check result + * 3.1 edge not updated + * 3.2 prime not exist + * 3.3 double prime not exist + */ + TEST(ChainUpdateEdgeTest, updateTest2) { fs::TempDir rootPath("/tmp/UpdateEdgeTest.XXXXXX"); mock::MockCluster cluster; @@ -75,6 +94,7 @@ TEST(ChainUpdateEdgeTest, updateTest2) { auto* env = cluster.storageEnv_.get(); auto mClient = MetaClientTestUpdater::makeDefault(); env->metaClient_ = mClient.get(); + MetaClientTestUpdater::addPartTerm(env->metaClient_, mockSpaceId, mockPartNum, fackTerm); auto parts = cluster.getTotalParts(); EXPECT_TRUE(QueryTestUtils::mockEdgeData(env, parts, mockSpaceVidLen)); @@ -89,12 +109,12 @@ TEST(ChainUpdateEdgeTest, updateTest2) { auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::E_KEY_NOT_FOUND; + proc->setRemoteCode(Code::E_KEY_NOT_FOUND); proc->process(badRequest); auto resp = std::move(f).get(); EXPECT_EQ(1, (*resp.result_ref()).failed_parts.size()); - EXPECT_FALSE(helper.checkResp2(resp)); + EXPECT_FALSE(helper.checkResp(resp)); EXPECT_FALSE(helper.edgeExist(env, badRequest)); EXPECT_FALSE(helper.primeExist(env, badRequest)); EXPECT_FALSE(helper.doublePrimeExist(env, badRequest)); @@ -119,8 +139,9 @@ TEST(ChainUpdateEdgeTest, updateTest3) { auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::SUCCEEDED; - proc->rcProcessLocal = Code::SUCCEEDED; + proc->setRemoteCode(Code::SUCCEEDED); + proc->setCommitCode(Code::SUCCEEDED); + proc->process(goodRequest); auto resp = std::move(f).get(); @@ -146,9 +167,12 @@ TEST(ChainUpdateEdgeTest, updateTest4) { EXPECT_FALSE(helper.primeExist(env, goodRequest)); EXPECT_FALSE(helper.doublePrimeExist(env, goodRequest)); + UPCLT iClient(FakeInternalStorageClient::instance(env, nebula::cpp2::ErrorCode::SUCCEEDED)); + FakeInternalStorageClient::hookInternalStorageClient(env, iClient.get()); + auto* proc = new FakeChainUpdateProcessor(env); auto f = proc->getFuture(); - proc->rcProcessRemote = Code::E_RPC_FAILURE; + proc->setRemoteCode(Code::E_RPC_FAILURE); proc->process(goodRequest); auto resp = std::move(f).get(); @@ -161,6 +185,8 @@ TEST(ChainUpdateEdgeTest, updateTest4) { } // namespace nebula int main(int argc, char** argv) { + FLAGS_trace_toss = true; + FLAGS_v = 1; testing::InitGoogleTest(&argc, argv); folly::init(&argc, &argv, false); google::SetStderrLogging(google::INFO); @@ -168,30 +194,6 @@ int main(int argc, char** argv) { } // ***** Test Plan ***** -/** - * @brief updateTest1 (update a normal edge will succeed) - * previous update - * prepareLocal succeed succeed - * processRemote succeed succeed - * processLocal succeed succeed - * expect: edge true - * edge prime false - * double prime false - * prop changed true - */ - -/** - * @brief updateTest2 (update non-exist edge will fail) - * previous update - * prepareLocal failed succeed - * processRemote skip succeed - * processLocal failed succeed - * expect: edge false - * edge prime false - * double prime false - * prop changed true - */ - /** * @brief updateTest3 (remote update failed will not change anything) * previous update diff --git a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp index 1e58236c370..de0043e3b85 100644 --- a/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp +++ b/src/storage/transaction/ChainAddEdgesLocalProcessor.cpp @@ -22,61 +22,42 @@ void ChainAddEdgesLocalProcessor::process(const cpp2::AddEdgesRequest& req) { finish(); return; } + + uuid_ = ConsistUtil::strUUID(); + execDesc_ = ", AddEdges, "; env_->txnMan_->addChainTask(this); } -/** - * @brief - * 1. check term - * 2. set mem lock - * 3. write edge prime(key = edge prime, val = ) - */ folly::SemiFuture ChainAddEdgesLocalProcessor::prepareLocal() { - if (FLAGS_trace_toss) { - uuid_ = ConsistUtil::strUUID(); - readableEdgeDesc_ = makeReadableEdge(req_); - if (!readableEdgeDesc_.empty()) { - uuid_.append(" ").append(readableEdgeDesc_); - } + VLOG(2) << uuid_ << __func__ << "()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (rcPrepare_ != Code::SUCCEEDED) { + finish(); + return rcPrepare_; } if (!lockEdges(req_)) { + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; return Code::E_WRITE_WRITE_CONFLICT; } + replaceNullWithDefaultValue(req_); auto [pro, fut] = folly::makePromiseContract(); auto primes = makePrime(); - std::vector debugPrimes; - if (FLAGS_trace_toss) { - debugPrimes = primes; - } erasePrime(); env_->kvstore_->asyncMultiPut( - spaceId_, - localPartId_, - std::move(primes), - [p = std::move(pro), debugPrimes, this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - primeInserted_ = true; - if (FLAGS_trace_toss) { - for (auto& kv : debugPrimes) { - VLOG(1) << uuid_ << " put prime " << folly::hexlify(kv.first); - } - } - } else { - LOG(WARNING) << uuid_ << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } - + spaceId_, localPartId_, std::move(primes), [p = std::move(pro), this](auto rc) mutable { + rcPrepare_ = rc; p.setValue(rc); }); return std::move(fut); } folly::SemiFuture ChainAddEdgesLocalProcessor::processRemote(Code code) { - VLOG(1) << uuid_ << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED) { - return code; + VLOG(2) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } CHECK_EQ(req_.get_parts().size(), 1); auto reversedRequest = reverseRequest(req_); @@ -86,52 +67,39 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::processRemote(Code code) { return std::move(fut); } -folly::SemiFuture ChainAddEdgesLocalProcessor::processLocal(Code code) { - if (FLAGS_trace_toss) { - VLOG(1) << uuid_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); +folly::SemiFuture ChainAddEdgesLocalProcessor::processLocal(Code) { + VLOG(2) << uuid_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(rcRemote_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - bool remoteFailed{true}; - - if (code == Code::SUCCEEDED) { - // do nothing - remoteFailed = false; - } else if (code == Code::E_RPC_FAILURE) { - code_ = Code::SUCCEEDED; - remoteFailed = false; - } else { - code_ = code; - } - - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { kvAppend_ = makeDoublePrime(); - addUnfinishedEdge(ResumeType::RESUME_REMOTE); } - if (code_ == Code::SUCCEEDED) { - return forwardToDelegateProcessor(); - } else { - if (primeInserted_ && remoteFailed) { - return abort(); - } + if (rcRemote_ != Code::SUCCEEDED && rcRemote_ != Code::E_RPC_FAILURE) { + // prepare succeed and remote failed + return abort(); } - return code_; + return commit(); } -void ChainAddEdgesLocalProcessor::addUnfinishedEdge(ResumeType type) { +void ChainAddEdgesLocalProcessor::reportFailed(ResumeType type) { if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } + execDesc_ += ", reportFailed"; auto keys = toStrKeys(req_); for (auto& key : keys) { - env_->txnMan_->addPrime(spaceId_, key, type); + VLOG(1) << uuid_ << " term=" << term_ << ", reportFailed(), " << folly::hexlify(key); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, key, type); } } @@ -139,34 +107,11 @@ bool ChainAddEdgesLocalProcessor::prepareRequest(const cpp2::AddEdgesRequest& re CHECK_EQ(req.get_parts().size(), 1); req_ = req; spaceId_ = req_.get_space_id(); - auto vidType = env_->metaClient_->getSpaceVidType(spaceId_); - if (!vidType.ok()) { - LOG(WARNING) << "can't get vidType"; - return false; - } else { - spaceVidType_ = vidType.value(); - } localPartId_ = req.get_parts().begin()->first; - replaceNullWithDefaultValue(req_); - - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { - LOG(INFO) << "get term failed"; - return false; - } - - auto vidLen = env_->schemaMan_->getSpaceVidLen(spaceId_); - if (!vidLen.ok()) { - LOG(ERROR) << "getSpaceVidLen failed, spaceId_: " << spaceId_ - << ", status: " << vidLen.status(); - setErrorCode(Code::E_INVALID_SPACEVIDLEN); - return false; - } - spaceVidLen_ = vidLen.value(); - return true; + return getSpaceVidLen(spaceId_) == Code::SUCCEEDED; } -folly::SemiFuture ChainAddEdgesLocalProcessor::forwardToDelegateProcessor() { +folly::SemiFuture ChainAddEdgesLocalProcessor::commit() { auto* proc = AddEdgesProcessor::instance(env_, nullptr); proc->consistOp_ = [&](kvstore::BatchHolder& a, std::vector* b) { callbackOfChainOp(a, b); @@ -174,26 +119,15 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::forwardToDelegateProcessor( auto futProc = proc->getFuture(); auto [pro, fut] = folly::makePromiseContract(); std::move(futProc).thenTry([&, p = std::move(pro)](auto&& t) mutable { - auto rc = Code::SUCCEEDED; + execDesc_ += ", commit(), "; if (t.hasException()) { LOG(INFO) << "catch ex: " << t.exception().what(); - rc = Code::E_UNKNOWN; + rcCommit_ = Code::E_UNKNOWN; } else { auto& resp = t.value(); - rc = extractRpcError(resp); - if (rc == Code::SUCCEEDED) { - if (FLAGS_trace_toss) { - for (auto& k : kvErased_) { - VLOG(1) << uuid_ << " erase prime " << folly::hexlify(k); - } - } - } else { - VLOG(1) << uuid_ << " forwardToDelegateProcessor(), code = " - << apache::thrift::util::enumNameSafe(rc); - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } + rcCommit_ = extractRpcError(resp); } - p.setValue(rc); + p.setValue(rcCommit_); }); proc->process(req_); return std::move(fut); @@ -215,22 +149,22 @@ void ChainAddEdgesLocalProcessor::doRpc(folly::Promise&& promise, promise.setValue(Code::E_LEADER_CHANGED); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto f = p.getFuture(); iClient->chainAddEdges(req, term_, edgeVer_, std::move(p)); std::move(f).thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), std::move(req), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }); } @@ -260,23 +194,14 @@ folly::SemiFuture ChainAddEdgesLocalProcessor::abort() { } auto [pro, fut] = folly::makePromiseContract(); - env_->kvstore_->asyncMultiRemove( - req_.get_space_id(), - localPartId_, - std::move(kvErased_), - [p = std::move(pro), debugErased, this](auto rc) mutable { - VLOG(1) << uuid_ << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - if (FLAGS_trace_toss) { - for (auto& k : debugErased) { - VLOG(1) << uuid_ << "erase prime " << folly::hexlify(k); - } - } - } else { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - p.setValue(rc); - }); + env_->kvstore_->asyncMultiRemove(req_.get_space_id(), + localPartId_, + std::move(kvErased_), + [p = std::move(pro), debugErased, this](auto rc) mutable { + execDesc_ += ", abort(), "; + this->rcCommit_ = rc; + p.setValue(rc); + }); return std::move(fut); } @@ -322,8 +247,8 @@ void ChainAddEdgesLocalProcessor::erasePrime() { bool ChainAddEdgesLocalProcessor::lockEdges(const cpp2::AddEdgesRequest& req) { auto partId = req.get_parts().begin()->first; - auto* lockCore = env_->txnMan_->getLockCore(req.get_space_id(), partId); - if (!lockCore) { + lkCore_ = env_->txnMan_->getLockCore(req.get_space_id(), partId, term_); + if (!lkCore_) { return false; } @@ -331,7 +256,7 @@ bool ChainAddEdgesLocalProcessor::lockEdges(const cpp2::AddEdgesRequest& req) { for (auto& edge : req.get_parts().begin()->second) { keys.emplace_back(ConsistUtil::edgeKey(spaceVidLen_, partId, edge.get_key())); } - lk_ = std::make_unique(lockCore, keys); + lk_ = std::make_unique(lkCore_.get(), keys); return lk_->isLocked(); } @@ -363,9 +288,31 @@ cpp2::AddEdgesRequest ChainAddEdgesLocalProcessor::reverseRequest( } void ChainAddEdgesLocalProcessor::finish() { - VLOG(1) << uuid_ << " commitLocal(), code_ = " << apache::thrift::util::enumNameSafe(code_); - pushResultCode(code_, localPartId_); - finished_.setValue(code_); + auto rc = (rcPrepare_ == Code::SUCCEEDED) ? rcCommit_ : rcPrepare_; + if (rcPrepare_ == Code::SUCCEEDED) { + VLOG(1) << uuid_ << execDesc_ << makeReadableEdge(req_) + << ", rcPrepare_=" << apache::thrift::util::enumNameSafe(rcPrepare_) + << ", rcRemote_=" << apache::thrift::util::enumNameSafe(rcRemote_) + << ", rcCommit_=" << apache::thrift::util::enumNameSafe(rcCommit_); + } + do { + if (rcPrepare_ != Code::SUCCEEDED) { + break; // nothing written, no need to recover. + } + + if (rcCommit_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + pushResultCode(rc, localPartId_); + finished_.setValue(rc); onFinished(); } @@ -383,36 +330,15 @@ cpp2::AddEdgesRequest ChainAddEdgesLocalProcessor::makeSingleEdgeRequest( return req; } -int64_t ChainAddEdgesLocalProcessor::toInt(const ::nebula::Value& val) { - if (spaceVidType_ == nebula::cpp2::PropertyType::FIXED_STRING) { - auto str = val.toString(); - if (str.size() < 3) { - return 0; - } - auto str2 = str.substr(1, str.size() - 2); - return atoll(str2.c_str()); - } else if (spaceVidType_ == nebula::cpp2::PropertyType::INT64) { - return *reinterpret_cast(const_cast(val.toString().c_str() + 1)); - } - return 0; -} - std::string ChainAddEdgesLocalProcessor::makeReadableEdge(const cpp2::AddEdgesRequest& req) { - if (req.get_parts().size() != 1) { - LOG(INFO) << req.get_parts().size(); - return ""; - } - if (req.get_parts().begin()->second.size() != 1) { - LOG(INFO) << req.get_parts().begin()->second.size(); - return ""; + std::stringstream oss; + oss << "term=" << term_ << ", "; + auto rawKeyVec = toStrKeys(req); + for (auto& rawKey : rawKeyVec) { + oss << ConsistUtil::readableKey(spaceVidLen_, isIntId_, rawKey) << ", "; } - auto& edge = req.get_parts().begin()->second.back(); - int64_t isrc = toInt(edge.get_key().get_src()); - int64_t idst = toInt(edge.get_key().get_dst()); - - std::stringstream oss; - oss << isrc << "->" << idst << ", val: "; + auto& edge = req.get_parts().begin()->second.back(); for (auto& val : edge.get_props()) { oss << val.toString() << " "; } diff --git a/src/storage/transaction/ChainAddEdgesLocalProcessor.h b/src/storage/transaction/ChainAddEdgesLocalProcessor.h index 06695e29677..d9f397dc91d 100644 --- a/src/storage/transaction/ChainAddEdgesLocalProcessor.h +++ b/src/storage/transaction/ChainAddEdgesLocalProcessor.h @@ -97,13 +97,18 @@ class ChainAddEdgesLocalProcessor : public BaseProcessor, void eraseDoublePrime(); - folly::SemiFuture forwardToDelegateProcessor(); + /** + * @brief will call normal AddEdgesProcess to do real insert. + * + * @return folly::SemiFuture + */ + folly::SemiFuture commit(); /// if any operation failed or can not determined(RPC error) /// call this to leave a record in transaction manager /// the record can be scanned by the background resume thread /// then will do fail over logic - void addUnfinishedEdge(ResumeType type); + void reportFailed(ResumeType type); /*** consider the following case: * @@ -119,36 +124,42 @@ class ChainAddEdgesLocalProcessor : public BaseProcessor, * */ void replaceNullWithDefaultValue(cpp2::AddEdgesRequest& req); - std::string makeReadableEdge(const cpp2::AddEdgesRequest& req); + /** + * @brief check is an error code belongs to kv store + * we can do retry / recover if we meet a kv store error + * but if we meet a logical error (retry will alwasy failed) + * we should return error directly. + * @param code + * @return true + * @return false + */ + bool isKVStoreError(Code code); - int64_t toInt(const ::nebula::Value& val); + std::string makeReadableEdge(const cpp2::AddEdgesRequest& req); protected: GraphSpaceID spaceId_; PartitionID localPartId_; PartitionID remotePartId_; cpp2::AddEdgesRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_{nullptr}; int retryLimit_{10}; - // term at prepareLocal, not allowed to change during execution - TermID term_{-1}; - - // set to true when prime insert succeed - // in processLocal(), we check this to determine if need to do abort() - bool primeInserted_{false}; std::vector kvErased_; std::vector kvAppend_; folly::Optional edgeVer_{folly::none}; - int64_t resumedEdgeVer_{-1}; - // for debug / trace purpose + // for trace purpose std::string uuid_; + // as we print all description in finish(), + // we can log execution clue in this + std::string execDesc_; + // for debug, edge "100"->"101" will print like 2231303022->2231303122 // which is hard to recognize. Transform to human readable format std::string readableEdgeDesc_; - nebula::cpp2::PropertyType spaceVidType_{nebula::cpp2::PropertyType::UNKNOWN}; }; } // namespace storage diff --git a/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp b/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp index 94dfce48417..01f648ff143 100644 --- a/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp +++ b/src/storage/transaction/ChainAddEdgesRemoteProcessor.cpp @@ -42,7 +42,7 @@ void ChainAddEdgesRemoteProcessor::process(const cpp2::ChainAddEdgesRequest& req // need to do this after set spaceVidLen_ auto keys = getStrEdgeKeys(req); for (auto& key : keys) { - LOG(INFO) << uuid_ << ", key = " << folly::hexlify(key); + VLOG(2) << uuid_ << ", key = " << folly::hexlify(key); } } commit(req); @@ -61,7 +61,7 @@ void ChainAddEdgesRemoteProcessor::commit(const cpp2::ChainAddEdgesRequest& req) rc = part.code; handleErrorCode(part.code, spaceId, part.get_part_id()); } - VLOG(1) << uuid_ << " " << apache::thrift::util::enumNameSafe(rc); + VLOG(2) << uuid_ << " " << apache::thrift::util::enumNameSafe(rc); this->result_ = resp.get_result(); this->onFinished(); }); diff --git a/src/storage/transaction/ChainBaseProcessor.h b/src/storage/transaction/ChainBaseProcessor.h index c20f7dc6e84..e9f2e3e128c 100644 --- a/src/storage/transaction/ChainBaseProcessor.h +++ b/src/storage/transaction/ChainBaseProcessor.h @@ -20,6 +20,8 @@ using Code = ::nebula::cpp2::ErrorCode; * */ class ChainBaseProcessor { + friend class ChainProcessorFactory; + public: virtual ~ChainBaseProcessor() = default; @@ -42,14 +44,10 @@ class ChainBaseProcessor { virtual void finish() = 0; protected: - void setErrorCode(Code code) { - if (code_ == Code::SUCCEEDED) { - code_ = code; - } - } - - protected: - Code code_ = Code::SUCCEEDED; + Code rcPrepare_ = Code::SUCCEEDED; + Code rcRemote_ = Code::E_UNKNOWN; + Code rcCommit_ = Code::E_UNKNOWN; + TermID term_; folly::Promise finished_; }; diff --git a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp index f9c9e1951b8..3acfb6be462 100644 --- a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.cpp @@ -29,10 +29,10 @@ void ChainDeleteEdgesLocalProcessor::process(const cpp2::DeleteEdgesRequest& req folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { txnId_ = ConsistUtil::strUUID(); - VLOG(1) << txnId_ << " prepareLocal(): " << DeleteEdgesRequestHelper::explain(req_); if (!lockEdges(req_)) { - return Code::E_WRITE_WRITE_CONFLICT; + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; + return rcPrepare_; } primes_ = makePrime(req_); @@ -42,12 +42,7 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { auto [pro, fut] = folly::makePromiseContract(); env_->kvstore_->asyncMultiPut( spaceId_, localPartId_, std::move(primes), [p = std::move(pro), this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - setPrime_ = true; - } else { - LOG(WARNING) << txnId_ << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } - + rcPrepare_ = rc; p.setValue(rc); }); return std::move(fut); @@ -55,8 +50,8 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::prepareLocal() { folly::SemiFuture ChainDeleteEdgesLocalProcessor::processRemote(Code code) { VLOG(1) << txnId_ << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED) { - return code; + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } DCHECK_EQ(req_.get_parts().size(), 1); auto reversedRequest = reverseRequest(req_); @@ -68,53 +63,45 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::processRemote(Code code) folly::SemiFuture ChainDeleteEdgesLocalProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); - - bool remoteFailed{false}; - if (code == Code::SUCCEEDED) { - // do nothing - } else if (code == Code::E_RPC_FAILURE) { - code_ = Code::SUCCEEDED; - } else { - code_ = code; - remoteFailed = true; + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - auto [currTerm, suc] = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto [currTerm, suc] = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm != term_) { LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { + auto keyPrefix = ConsistUtil::doublePrimeTable(localPartId_); + setDoublePrime_ = true; for (auto& kv : primes_) { - auto key = - ConsistUtil::doublePrimeTable().append(kv.first.substr(ConsistUtil::primeTable().size())); - setDoublePrime_ = true; + auto key = keyPrefix + kv.first.substr(sizeof(PartitionID)); doublePrimes_.emplace_back(key, kv.second); } - reportFailed(ResumeType::RESUME_REMOTE); } - if (code_ == Code::SUCCEEDED) { + if (rcRemote_ == Code::SUCCEEDED || rcRemote_ == Code::E_RPC_FAILURE) { return commitLocal(); } else { - if (setPrime_ && remoteFailed) { - return abort(); - } + return abort(); } - return code_; + // actually, should return either commit() or abort() + return rcRemote_; } void ChainDeleteEdgesLocalProcessor::reportFailed(ResumeType type) { if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } for (auto& edgesOfPart : req_.get_parts()) { auto partId = edgesOfPart.first; for (auto& key : edgesOfPart.second) { auto strKey = ConsistUtil::edgeKey(spaceVidLen_, partId, key); - env_->txnMan_->addPrime(spaceId_, strKey, type); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, strKey, type); } } } @@ -142,7 +129,7 @@ std::vector ChainDeleteEdgesLocalProcessor::makePrime( val += ConsistUtil::deleteIdentifier(); auto partId = singleReq.get_parts().begin()->first; auto& edgeKey = singleReq.get_parts().begin()->second.back(); - auto key = ConsistUtil::primeTable(); + auto key = ConsistUtil::primeTable(partId); key += ConsistUtil::edgeKey(spaceVidLen_, partId, edgeKey); ret.emplace_back(std::make_pair(key, val)); } @@ -154,15 +141,13 @@ Code ChainDeleteEdgesLocalProcessor::checkRequest(const cpp2::DeleteEdgesRequest req_ = req; DCHECK(!req_.get_parts().empty()); spaceId_ = req_.get_space_id(); + localPartId_ = req.get_parts().begin()->first; - auto vidType = env_->metaClient_->getSpaceVidType(spaceId_); - if (!vidType.ok()) { - LOG(WARNING) << "can't get vidType, spaceId_ = " << spaceId_; - return Code::E_SPACE_NOT_FOUND; - } else { - spaceVidType_ = vidType.value(); + auto rc = getSpaceVidLen(spaceId_); + if (rc != Code::SUCCEEDED) { + return rc; } - localPartId_ = req.get_parts().begin()->first; + auto part = env_->kvstore_->part(spaceId_, localPartId_); if (!nebula::ok(part)) { pushResultCode(nebula::error(part), localPartId_); @@ -180,13 +165,6 @@ Code ChainDeleteEdgesLocalProcessor::checkRequest(const cpp2::DeleteEdgesRequest term_ = (nebula::value(part))->termId(); - auto vidLen = env_->schemaMan_->getSpaceVidLen(spaceId_); - if (!vidLen.ok()) { - LOG(ERROR) << "getSpaceVidLen failed, spaceId_: " << spaceId_ - << ", status: " << vidLen.status(); - return Code::E_INVALID_SPACEVIDLEN; - } - spaceVidLen_ = vidLen.value(); return Code::SUCCEEDED; } @@ -199,12 +177,7 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::commitLocal() { auto [pro, fut] = folly::makePromiseContract(); std::move(futProc).thenValue([&, p = std::move(pro)](auto&& resp) mutable { auto rc = ConsistUtil::getErrorCode(resp); - VLOG(1) << txnId_ << " commitLocal() " << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - // do nothing - } else { - reportFailed(ResumeType::RESUME_CHAIN); - } + rcCommit_ = rc; p.setValue(rc); }); proc->process(req_); @@ -218,22 +191,22 @@ void ChainDeleteEdgesLocalProcessor::doRpc(folly::Promise&& promise, promise.setValue(Code::E_LEADER_CHANGED); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto f = p.getFuture(); iClient->chainDeleteEdges(req, txnId_, term_, std::move(p)); std::move(f).thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), std::move(req), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }); } @@ -305,19 +278,15 @@ folly::SemiFuture ChainDeleteEdgesLocalProcessor::abort() { std::move(keyRemoved), [p = std::move(pro), this](auto rc) mutable { VLOG(1) << txnId_ << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc == Code::SUCCEEDED) { - // do nothing - } else { - reportFailed(ResumeType::RESUME_CHAIN); - } + rcCommit_ = rc; p.setValue(rc); }); return std::move(fut); } bool ChainDeleteEdgesLocalProcessor::lockEdges(const cpp2::DeleteEdgesRequest& req) { - auto* lockCore = env_->txnMan_->getLockCore(req.get_space_id(), localPartId_); - if (!lockCore) { + lkCore_ = env_->txnMan_->getLockCore(req.get_space_id(), localPartId_, term_); + if (!lkCore_) { VLOG(1) << txnId_ << "get lock failed."; return false; } @@ -328,9 +297,10 @@ bool ChainDeleteEdgesLocalProcessor::lockEdges(const cpp2::DeleteEdgesRequest& r keys.emplace_back(std::move(eKey)); } bool dedup = true; - lk_ = std::make_unique(lockCore, keys, dedup); + lk_ = std::make_unique(lkCore_.get(), keys, dedup); if (!lk_->isLocked()) { - VLOG(1) << txnId_ << " conflict " << ConsistUtil::readableKey(spaceVidLen_, lk_->conflictKey()); + VLOG(1) << txnId_ << "term=" << term_ << ", conflict key = " + << ConsistUtil::readableKey(spaceVidLen_, isIntId_, lk_->conflictKey()); } return lk_->isLocked(); } @@ -350,9 +320,33 @@ cpp2::DeleteEdgesRequest ChainDeleteEdgesLocalProcessor::reverseRequest( } void ChainDeleteEdgesLocalProcessor::finish() { - VLOG(1) << txnId_ << " commitLocal(), code_ = " << apache::thrift::util::enumNameSafe(code_); - pushResultCode(code_, localPartId_); - finished_.setValue(code_); + VLOG(1) << txnId_ << " commitLocal() = " << apache::thrift::util::enumNameSafe(rcCommit_); + TermID currTerm = 0; + std::tie(currTerm, std::ignore) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + do { + if (term_ != currTerm) { + // transaction manager will do the clean. + break; + } + + if (rcPrepare_ != Code::SUCCEEDED) { + break; // nothing written, no need to recover. + } + + if (rcCommit_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + auto rc = (rcPrepare_ == Code::SUCCEEDED) ? rcCommit_ : rcPrepare_; + pushResultCode(rc, localPartId_); + finished_.setValue(rc); onFinished(); } diff --git a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h index 2c4f467b3d3..5a356b36e0d 100644 --- a/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h +++ b/src/storage/transaction/ChainDeleteEdgesLocalProcessor.h @@ -86,6 +86,7 @@ class ChainDeleteEdgesLocalProcessor : public BaseProcessor, PartitionID localPartId_; PartitionID remotePartId_; cpp2::DeleteEdgesRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_{nullptr}; int retryLimit_{10}; /** @@ -107,8 +108,6 @@ class ChainDeleteEdgesLocalProcessor : public BaseProcessor, std::string txnId_; - ::nebula::cpp2::PropertyType spaceVidType_{::nebula::cpp2::PropertyType::UNKNOWN}; - // for debug, edge "100"->"101" will print like 2231303022->2231303122 // which is hard to recognize. Transform to human readable format std::string readableEdgeDesc_; diff --git a/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp index 19698798cff..3b3da57339d 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesResumeProcessor.cpp @@ -17,15 +17,14 @@ ChainDeleteEdgesResumeProcessor::ChainDeleteEdgesResumeProcessor(StorageEnv* env const std::string& val) : ChainDeleteEdgesLocalProcessor(env) { req_ = DeleteEdgesRequestHelper::parseDeleteEdgesRequest(val); - - VLOG(1) << "explain req_: " << DeleteEdgesRequestHelper::explain(req_); } folly::SemiFuture ChainDeleteEdgesResumeProcessor::prepareLocal() { - code_ = checkRequest(req_); + rcPrepare_ = checkRequest(req_); primes_ = makePrime(req_); setPrime_ = true; - return code_; + rcPrepare_ = Code::SUCCEEDED; + return rcPrepare_; } folly::SemiFuture ChainDeleteEdgesResumeProcessor::processRemote(Code code) { @@ -35,24 +34,25 @@ folly::SemiFuture ChainDeleteEdgesResumeProcessor::processRemote(Code code folly::SemiFuture ChainDeleteEdgesResumeProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } if (code == Code::E_RPC_FAILURE) { for (auto& kv : primes_) { - auto key = - ConsistUtil::doublePrimeTable().append(kv.first.substr(ConsistUtil::primeTable().size())); + auto key = ConsistUtil::doublePrimeTable(localPartId_) + .append(kv.first.substr(ConsistUtil::primeTable(localPartId_).size())); doublePrimes_.emplace_back(key, kv.second); } } - if (code == Code::E_RPC_FAILURE || code == Code::SUCCEEDED) { + if (rcRemote_ == Code::E_RPC_FAILURE || rcRemote_ == Code::SUCCEEDED) { // if there are something wrong other than rpc failure // we need to keep the resume retry(by not remove those prime key) - code_ = commitLocal().get(); - return code_; + return commitLocal(); } - return code_; + return rcRemote_; } } // namespace storage diff --git a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp index a0e0cdbc84f..cbb970a21fc 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp +++ b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.cpp @@ -17,48 +17,59 @@ ChainDeleteEdgesResumeRemoteProcessor::ChainDeleteEdgesResumeRemoteProcessor(Sto } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::prepareLocal() { - code_ = checkRequest(req_); - return code_; + rcPrepare_ = checkRequest(req_); + return rcPrepare_; } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::processRemote(Code code) { VLOG(1) << txnId_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); - return ChainDeleteEdgesLocalProcessor::processRemote(code); } folly::SemiFuture ChainDeleteEdgesResumeRemoteProcessor::processLocal(Code code) { VLOG(1) << txnId_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - - setErrorCode(code); - - if (code == Code::E_RPC_FAILURE) { - return code_; + if (code != Code::SUCCEEDED) { + return code; } - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove double prime key) - std::vector doublePrimeKeys; - for (auto& partOfKeys : req_.get_parts()) { - std::string key; - for (auto& edgeKey : partOfKeys.second) { - doublePrimeKeys.emplace_back(); - doublePrimeKeys.back() = ConsistUtil::doublePrimeTable().append( - ConsistUtil::edgeKey(spaceVidLen_, localPartId_, edgeKey)); - } + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove double prime key) + std::vector doublePrimeKeys; + for (auto& partOfKeys : req_.get_parts()) { + std::string key; + for (auto& edgeKey : partOfKeys.second) { + doublePrimeKeys.emplace_back(); + doublePrimeKeys.back() = + ConsistUtil::doublePrimeTable(localPartId_) + .append(ConsistUtil::edgeKey(spaceVidLen_, localPartId_, edgeKey)); } - - folly::Baton baton; - env_->kvstore_->asyncMultiRemove( - spaceId_, localPartId_, std::move(doublePrimeKeys), [this, &baton](auto&& rc) { - this->code_ = rc; - baton.post(); - }); - baton.wait(); } - return code_; + auto [pro, fut] = folly::makePromiseContract(); + env_->kvstore_->asyncMultiRemove(spaceId_, + localPartId_, + std::move(doublePrimeKeys), + [this, p = std::move(pro)](auto&& rc) mutable { + rcCommit_ = rc; + p.setValue(rc); + }); + return std::move(fut); +} + +void ChainDeleteEdgesResumeRemoteProcessor::finish() { + VLOG(1) << " commitLocal() = " << apache::thrift::util::enumNameSafe(rcCommit_); + TermID currTerm = 0; + std::tie(currTerm, std::ignore) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (term_ == currTerm) { + if (rcCommit_ != Code::SUCCEEDED || rcRemote_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_REMOTE); + } + } else { + // transaction manager will do the clean. + } + pushResultCode(rcCommit_, localPartId_); + finished_.setValue(rcCommit_); + onFinished(); } } // namespace storage diff --git a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h index 31c091f5962..3ae19e2dd5c 100644 --- a/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h +++ b/src/storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h @@ -16,13 +16,15 @@ class ChainDeleteEdgesResumeRemoteProcessor : public ChainDeleteEdgesLocalProces return new ChainDeleteEdgesResumeRemoteProcessor(env, val); } + virtual ~ChainDeleteEdgesResumeRemoteProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ChainDeleteEdgesResumeRemoteProcessor() = default; + void finish() override; protected: ChainDeleteEdgesResumeRemoteProcessor(StorageEnv* env, const std::string& val); diff --git a/src/storage/transaction/ChainProcessorFactory.cpp b/src/storage/transaction/ChainProcessorFactory.cpp index b8a60e60e4d..2f25402c683 100644 --- a/src/storage/transaction/ChainProcessorFactory.cpp +++ b/src/storage/transaction/ChainProcessorFactory.cpp @@ -7,16 +7,43 @@ #include "storage/transaction/ChainDeleteEdgesResumeProcessor.h" #include "storage/transaction/ChainDeleteEdgesResumeRemoteProcessor.h" +#include "storage/transaction/ChainResumeAddDoublePrimeProcessor.h" +#include "storage/transaction/ChainResumeAddPrimeProcessor.h" +#include "storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h" +#include "storage/transaction/ChainResumeUpdatePrimeProcessor.h" #include "storage/transaction/ConsistUtil.h" -#include "storage/transaction/ResumeAddEdgeProcessor.h" -#include "storage/transaction/ResumeAddEdgeRemoteProcessor.h" -#include "storage/transaction/ResumeUpdateProcessor.h" -#include "storage/transaction/ResumeUpdateRemoteProcessor.h" namespace nebula { namespace storage { +ChainBaseProcessor* ChainProcessorFactory::make(StorageEnv* env, + GraphSpaceID spaceId, + TermID termId, + const std::string& edgeKey, + ResumeType type) { + auto partId = NebulaKeyUtils::getPart(edgeKey); + auto prefix = (type == ResumeType::RESUME_CHAIN) ? ConsistUtil::primeTable(partId) + : ConsistUtil::doublePrimeTable(partId); + auto key = prefix + edgeKey; + std::string val; + auto rc = Code::SUCCEEDED; + do { + rc = env->kvstore_->get(spaceId, partId, key, &val); + } while (rc == Code::E_LEADER_LEASE_FAILED); + + if (rc != Code::SUCCEEDED) { + VLOG(1) << "resume edge space=" << spaceId << ", part=" << partId + << ", hex = " << folly::hexlify(edgeKey) + << ", rc = " << apache::thrift::util::enumNameSafe(rc); + return nullptr; + } + + ResumeOptions opt(type, val); + return makeProcessor(env, termId, opt); +} + ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, + TermID termId, const ResumeOptions& options) { ChainBaseProcessor* ret = nullptr; auto requestType = ConsistUtil::parseType(options.primeValue); @@ -24,11 +51,13 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::INSERT: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { - ret = ResumeAddEdgeProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeAddPrimeProcessor"; + ret = ChainResumeAddPrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { - ret = ResumeAddEdgeRemoteProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeAddDoublePrimeProcessor"; + ret = ChainResumeAddDoublePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::UNKNOWN: { @@ -40,11 +69,13 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::UPDATE: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { - ret = ResumeUpdateProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeUpdatePrimeProcessor"; + ret = ChainResumeUpdatePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { - ret = ResumeUpdateRemoteProcessor::instance(env, options.primeValue); + VLOG(2) << "make ChainResumeUpdateDoublePrimeProcessor"; + ret = ChainResumeUpdateDoublePrimeProcessor::instance(env, options.primeValue); break; } case ResumeType::UNKNOWN: { @@ -56,10 +87,12 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, case RequestType::DELETE: { switch (options.resumeType) { case ResumeType::RESUME_CHAIN: { + VLOG(1) << "make ChainDeleteEdgesResumeProcessor"; ret = ChainDeleteEdgesResumeProcessor::instance(env, options.primeValue); break; } case ResumeType::RESUME_REMOTE: { + VLOG(2) << "make ChainDeleteEdgesResumeRemoteProcessor"; ret = ChainDeleteEdgesResumeRemoteProcessor::instance(env, options.primeValue); break; } @@ -73,6 +106,7 @@ ChainBaseProcessor* ChainProcessorFactory::makeProcessor(StorageEnv* env, LOG(FATAL) << "RequestType::UNKNOWN: not supposed run here"; } } + ret->term_ = termId; return ret; } diff --git a/src/storage/transaction/ChainProcessorFactory.h b/src/storage/transaction/ChainProcessorFactory.h index 6c1518199d1..05a062c0fda 100644 --- a/src/storage/transaction/ChainProcessorFactory.h +++ b/src/storage/transaction/ChainProcessorFactory.h @@ -14,7 +14,15 @@ namespace storage { class ChainProcessorFactory { public: - static ChainBaseProcessor* makeProcessor(StorageEnv* env, const ResumeOptions& options); + static ChainBaseProcessor* makeProcessor(StorageEnv* env, + TermID termId, + const ResumeOptions& options); + + static ChainBaseProcessor* make(StorageEnv* env, + GraphSpaceID spaceId, + TermID termId, + const std::string& edgeKey, + ResumeType type); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp new file mode 100644 index 00000000000..e23c69293b1 --- /dev/null +++ b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.cpp @@ -0,0 +1,78 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeAddDoublePrimeProcessor.h" + +namespace nebula { +namespace storage { + +ChainResumeAddDoublePrimeProcessor::ChainResumeAddDoublePrimeProcessor(StorageEnv* env, + const std::string& val) + : ChainAddEdgesLocalProcessor(env) { + req_ = ConsistUtil::parseAddRequest(val); + + uuid_ = ConsistUtil::strUUID() + " ResumeDoublePrime, "; +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::prepareLocal() { + ChainAddEdgesLocalProcessor::prepareRequest(req_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + + auto spaceId = req_.get_space_id(); + auto numOfPart = env_->metaClient_->partsNum(spaceId); + if (!numOfPart.ok()) { + rcPrepare_ = Code::E_SPACE_NOT_FOUND; + return Code::E_SPACE_NOT_FOUND; + } + auto& parts = req_.get_parts(); + auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); + remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); + + return Code::SUCCEEDED; +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::processRemote(Code code) { + return ChainAddEdgesLocalProcessor::processRemote(code); +} + +folly::SemiFuture ChainResumeAddDoublePrimeProcessor::processLocal(Code code) { + VLOG(2) << uuid_ << " commitLocal() = " << apache::thrift::util::enumNameSafe(code); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; + } + + if (code == Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + eraseDoublePrime(); + return abort(); + } + + return code; +} + +void ChainResumeAddDoublePrimeProcessor::finish() { + if (rcPrepare_ == Code::SUCCEEDED) { + VLOG(1) << uuid_ << ", " << makeReadableEdge(req_) + << ", rcPrepare_ = " << apache::thrift::util::enumNameSafe(rcPrepare_) + << ", rcRemote_ = " << apache::thrift::util::enumNameSafe(rcRemote_) + << ", rcCommit_ = " << apache::thrift::util::enumNameSafe(rcCommit_); + } + if (rcCommit_ != Code::SUCCEEDED || rcRemote_ != Code::SUCCEEDED) { + reportFailed(ResumeType::RESUME_REMOTE); + } else { + // nothing todo + } + pushResultCode(rcCommit_, localPartId_); + finished_.setValue(rcCommit_); + onFinished(); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeProcessor.h b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h similarity index 59% rename from src/storage/transaction/ResumeAddEdgeProcessor.h rename to src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h index 797bf7979aa..4eb918e2c99 100644 --- a/src/storage/transaction/ResumeAddEdgeProcessor.h +++ b/src/storage/transaction/ChainResumeAddDoublePrimeProcessor.h @@ -10,22 +10,24 @@ namespace nebula { namespace storage { -class ResumeAddEdgeProcessor : public ChainAddEdgesLocalProcessor { +class ChainResumeAddDoublePrimeProcessor : public ChainAddEdgesLocalProcessor { public: - static ResumeAddEdgeProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeAddEdgeProcessor(env, val); + static ChainResumeAddDoublePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeAddDoublePrimeProcessor(env, val); } + virtual ~ChainResumeAddDoublePrimeProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ResumeAddEdgeProcessor() = default; + void finish() override; protected: - ResumeAddEdgeProcessor(StorageEnv* env, const std::string& val); + ChainResumeAddDoublePrimeProcessor(StorageEnv* env, const std::string& val); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp b/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp new file mode 100644 index 00000000000..17675b6e307 --- /dev/null +++ b/src/storage/transaction/ChainResumeAddPrimeProcessor.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeAddPrimeProcessor.h" + +namespace nebula { +namespace storage { + +ChainResumeAddPrimeProcessor::ChainResumeAddPrimeProcessor(StorageEnv* env, const std::string& val) + : ChainAddEdgesLocalProcessor(env) { + req_ = ConsistUtil::parseAddRequest(val); + + uuid_ = ConsistUtil::strUUID(); + execDesc_ = ", ResumePrime. "; +} + +folly::SemiFuture ChainResumeAddPrimeProcessor::prepareLocal() { + VLOG(2) << uuid_ << " resume prime " << readableEdgeDesc_; + prepareRequest(req_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + auto spaceId = req_.get_space_id(); + auto numOfPart = env_->metaClient_->partsNum(spaceId); + if (!numOfPart.ok()) { + rcPrepare_ = Code::E_SPACE_NOT_FOUND; + return rcPrepare_; + } + auto& parts = req_.get_parts(); + auto& srcId = parts.begin()->second.back().get_key().get_src().getStr(); + auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); + localPartId_ = env_->metaClient_->partId(numOfPart.value(), srcId); + remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); + + return rcPrepare_; +} + +folly::SemiFuture ChainResumeAddPrimeProcessor::processRemote(Code code) { + VLOG(2) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); + return ChainAddEdgesLocalProcessor::processRemote(code); +} + +/** + * @brief this most import difference to ChainAddEdgesLocalProcessor is + * we can not abort, (delete an exist prime) + * @return folly::SemiFuture + */ +folly::SemiFuture ChainResumeAddPrimeProcessor::processLocal(Code) { + VLOG(2) << uuid_ << " processRemote() " << apache::thrift::util::enumNameSafe(rcRemote_); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } + + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + } + + if (rcRemote_ == Code::E_RPC_FAILURE) { + kvAppend_ = ChainAddEdgesLocalProcessor::makeDoublePrime(); + } + + if (rcRemote_ == Code::E_RPC_FAILURE || rcRemote_ == Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + erasePrime(); + return commit(); + } + + return rcRemote_; +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.h b/src/storage/transaction/ChainResumeAddPrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeAddEdgeRemoteProcessor.h rename to src/storage/transaction/ChainResumeAddPrimeProcessor.h index a9046814064..4f6251c3fb1 100644 --- a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.h +++ b/src/storage/transaction/ChainResumeAddPrimeProcessor.h @@ -10,22 +10,22 @@ namespace nebula { namespace storage { -class ResumeAddEdgeRemoteProcessor : public ChainAddEdgesLocalProcessor { +class ChainResumeAddPrimeProcessor : public ChainAddEdgesLocalProcessor { public: - static ResumeAddEdgeRemoteProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeAddEdgeRemoteProcessor(env, val); + static ChainResumeAddPrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeAddPrimeProcessor(env, val); } + virtual ~ChainResumeAddPrimeProcessor() = default; + folly::SemiFuture prepareLocal() override; folly::SemiFuture processRemote(nebula::cpp2::ErrorCode code) override; folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - virtual ~ResumeAddEdgeRemoteProcessor() = default; - protected: - ResumeAddEdgeRemoteProcessor(StorageEnv* env, const std::string& val); + ChainResumeAddPrimeProcessor(StorageEnv* env, const std::string& val); }; } // namespace storage diff --git a/src/storage/transaction/ChainResumeProcessor.cpp b/src/storage/transaction/ChainResumeProcessor.cpp deleted file mode 100644 index 4fad8f13749..00000000000 --- a/src/storage/transaction/ChainResumeProcessor.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ChainResumeProcessor.h" - -#include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainProcessorFactory.h" -#include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" -#include "storage/transaction/ConsistUtil.h" -#include "storage/transaction/TransactionManager.h" - -namespace nebula { -namespace storage { - -void ChainResumeProcessor::process() { - auto* table = env_->txnMan_->getDangleEdges(); - std::unique_ptr iter; - for (auto it = table->begin(); it != table->end(); ++it) { - auto spaceId = *reinterpret_cast(const_cast(it->first.c_str())); - auto edgeKey = std::string(it->first.c_str() + sizeof(GraphSpaceID), - it->first.size() - sizeof(GraphSpaceID)); - auto partId = NebulaKeyUtils::getPart(edgeKey); - auto prefix = (it->second == ResumeType::RESUME_CHAIN) ? ConsistUtil::primeTable() - : ConsistUtil::doublePrimeTable(); - auto key = prefix + edgeKey; - std::string val; - auto rc = env_->kvstore_->get(spaceId, partId, key, &val); - VLOG(1) << "resume edge space=" << spaceId << ", part=" << partId - << ", hex = " << folly::hexlify(edgeKey) - << ", rc = " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - // do nothing - } else if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - VLOG(1) << "kvstore->get() leader changed"; - auto getPart = env_->kvstore_->part(spaceId, partId); - if (nebula::ok(getPart) && !nebula::value(getPart)->isLeader()) { - // not leader any more, stop trying resume - env_->txnMan_->delPrime(spaceId, edgeKey); - } - continue; - } else if (rc == nebula::cpp2::ErrorCode::E_KEY_NOT_FOUND) { - // raft may rollback want we scanned. - env_->txnMan_->delPrime(spaceId, edgeKey); - } else { - LOG(WARNING) << "kvstore->get() failed, " << apache::thrift::util::enumNameSafe(rc); - continue; - } - - ResumeOptions opt(it->second, val); - auto* proc = ChainProcessorFactory::makeProcessor(env_, opt); - auto fut = proc->getFinished(); - env_->txnMan_->addChainTask(proc); - std::move(fut) - .thenValue([=](auto&& code) { - if (code == Code::SUCCEEDED) { - env_->txnMan_->delPrime(spaceId, edgeKey); - } else { - VLOG(1) << "recover failed: " << apache::thrift::util::enumNameSafe(rc); - } - }) - .get(); - } -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ChainResumeProcessor.h b/src/storage/transaction/ChainResumeProcessor.h deleted file mode 100644 index ac3572e319f..00000000000 --- a/src/storage/transaction/ChainResumeProcessor.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#pragma once - -#include "clients/storage/InternalStorageClient.h" -#include "common/utils/NebulaKeyUtils.h" -#include "storage/transaction/ChainAddEdgesLocalProcessor.h" -#include "storage/transaction/ChainBaseProcessor.h" -#include "storage/transaction/ChainUpdateEdgeLocalProcessor.h" -#include "storage/transaction/TransactionManager.h" - -namespace nebula { -namespace storage { - -class ChainResumeProcessor { - friend class ChainResumeProcessorTestHelper; - - public: - explicit ChainResumeProcessor(StorageEnv* env) : env_(env) {} - - void process(); - - private: - StorageEnv* env_{nullptr}; -}; - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp new file mode 100644 index 00000000000..0b9aede2e0e --- /dev/null +++ b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) 2021 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License. + */ + +#include "storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h" + +#include + +namespace nebula { +namespace storage { + +ChainResumeUpdateDoublePrimeProcessor::ChainResumeUpdateDoublePrimeProcessor(StorageEnv* env, + const std::string& val) + : ChainUpdateEdgeLocalProcessor(env) { + req_ = ConsistUtil::parseUpdateRequest(val); + ChainUpdateEdgeLocalProcessor::prepareRequest(req_); +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::prepareLocal() { + VLOG(1) << " prepareLocal()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + return rcPrepare_; +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::processRemote(Code code) { + VLOG(1) << " prepareLocal(), code = " << apache::thrift::util::enumNameSafe(code); + return ChainUpdateEdgeLocalProcessor::processRemote(code); +} + +folly::SemiFuture ChainResumeUpdateDoublePrimeProcessor::processLocal(Code code) { + VLOG(1) << " processRemote(), code = " << apache::thrift::util::enumNameSafe(code); + + if (code != Code::SUCCEEDED) { + // if there are something wrong other than rpc failure + // we need to keep the resume retry(by not remove those prime key) + return code; + } + + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (currTerm.first != term_) { + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; + } + + auto key = ConsistUtil::doublePrime(spaceVidLen_, localPartId_, req_.get_edge_key()); + kvErased_.emplace_back(std::move(key)); + return abort(); +} + +void ChainResumeUpdateDoublePrimeProcessor::finish() { + VLOG(1) << " commitLocal()=" << apache::thrift::util::enumNameSafe(rcCommit_); + if (isKVStoreError(rcCommit_) || rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + } + + pushResultCode(rcCommit_, req_.get_part_id()); + finished_.setValue(rcCommit_); + onFinished(); +} + +} // namespace storage +} // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateRemoteProcessor.h b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeUpdateRemoteProcessor.h rename to src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h index bb3171d061b..df4a6ba8bdf 100644 --- a/src/storage/transaction/ResumeUpdateRemoteProcessor.h +++ b/src/storage/transaction/ChainResumeUpdateDoublePrimeProcessor.h @@ -15,10 +15,10 @@ namespace storage { * if the TxnManager background resume thread found a prime key * it will create this processor to resume the complete update process */ -class ResumeUpdateRemoteProcessor : public ChainUpdateEdgeLocalProcessor { +class ChainResumeUpdateDoublePrimeProcessor : public ChainUpdateEdgeLocalProcessor { public: - static ResumeUpdateRemoteProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeUpdateRemoteProcessor(env, val); + static ChainResumeUpdateDoublePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeUpdateDoublePrimeProcessor(env, val); } folly::SemiFuture prepareLocal() override; @@ -29,10 +29,10 @@ class ResumeUpdateRemoteProcessor : public ChainUpdateEdgeLocalProcessor { void finish() override; - virtual ~ResumeUpdateRemoteProcessor() = default; + virtual ~ChainResumeUpdateDoublePrimeProcessor() = default; protected: - ResumeUpdateRemoteProcessor(StorageEnv* env, const std::string& val); + ChainResumeUpdateDoublePrimeProcessor(StorageEnv* env, const std::string& val); bool lockEdge(const cpp2::UpdateEdgeRequest& req); }; diff --git a/src/storage/transaction/ResumeUpdateProcessor.cpp b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp similarity index 55% rename from src/storage/transaction/ResumeUpdateProcessor.cpp rename to src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp index 075d0c10a2d..321d5cb3a9b 100644 --- a/src/storage/transaction/ResumeUpdateProcessor.cpp +++ b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.cpp @@ -3,7 +3,7 @@ * This source code is licensed under Apache 2.0 License. */ -#include "storage/transaction/ResumeUpdateProcessor.h" +#include "storage/transaction/ChainResumeUpdatePrimeProcessor.h" #include @@ -12,30 +12,34 @@ namespace nebula { namespace storage { -ResumeUpdateProcessor::ResumeUpdateProcessor(StorageEnv* env, const std::string& val) +ChainResumeUpdatePrimeProcessor::ChainResumeUpdatePrimeProcessor(StorageEnv* env, + const std::string& val) : ChainUpdateEdgeLocalProcessor(env) { req_ = ConsistUtil::parseUpdateRequest(val); ChainUpdateEdgeLocalProcessor::prepareRequest(req_); } -folly::SemiFuture ResumeUpdateProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - return code_; +folly::SemiFuture ChainResumeUpdatePrimeProcessor::prepareLocal() { + VLOG(1) << " prepareLocal()"; + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + return rcPrepare_; } -folly::SemiFuture ResumeUpdateProcessor::processRemote(Code code) { +folly::SemiFuture ChainResumeUpdatePrimeProcessor::processRemote(Code code) { VLOG(1) << "prepareLocal()=" << apache::thrift::util::enumNameSafe(code); return ChainUpdateEdgeLocalProcessor::processRemote(code); } -folly::SemiFuture ResumeUpdateProcessor::processLocal(Code code) { +folly::SemiFuture ChainResumeUpdatePrimeProcessor::processLocal(Code code) { VLOG(1) << "processRemote()=" << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; + } - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } if (code == Code::E_RPC_FAILURE) { @@ -47,18 +51,11 @@ folly::SemiFuture ResumeUpdateProcessor::processLocal(Code code) { // we need to keep the resume retry(by not remove those prime key) auto key = ConsistUtil::primeKey(spaceVidLen_, localPartId_, req_.get_edge_key()); kvErased_.emplace_back(std::move(key)); - forwardToDelegateProcessor(); - return code_; + return commit(); } return code; } -void ResumeUpdateProcessor::finish() { - VLOG(1) << "commitLocal()=" << apache::thrift::util::enumNameSafe(code_); - finished_.setValue(code_); - onFinished(); -} - } // namespace storage } // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateProcessor.h b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.h similarity index 67% rename from src/storage/transaction/ResumeUpdateProcessor.h rename to src/storage/transaction/ChainResumeUpdatePrimeProcessor.h index 557e351b4ed..bf13906f5bc 100644 --- a/src/storage/transaction/ResumeUpdateProcessor.h +++ b/src/storage/transaction/ChainResumeUpdatePrimeProcessor.h @@ -15,10 +15,10 @@ namespace storage { * if the TxnManager background resume thread found a prime key * it will create this processor to resume the complete update process */ -class ResumeUpdateProcessor : public ChainUpdateEdgeLocalProcessor { +class ChainResumeUpdatePrimeProcessor : public ChainUpdateEdgeLocalProcessor { public: - static ResumeUpdateProcessor* instance(StorageEnv* env, const std::string& val) { - return new ResumeUpdateProcessor(env, val); + static ChainResumeUpdatePrimeProcessor* instance(StorageEnv* env, const std::string& val) { + return new ChainResumeUpdatePrimeProcessor(env, val); } folly::SemiFuture prepareLocal() override; @@ -27,12 +27,10 @@ class ResumeUpdateProcessor : public ChainUpdateEdgeLocalProcessor { folly::SemiFuture processLocal(nebula::cpp2::ErrorCode code) override; - void finish() override; - - virtual ~ResumeUpdateProcessor() = default; + virtual ~ChainResumeUpdatePrimeProcessor() = default; protected: - ResumeUpdateProcessor(StorageEnv* env, const std::string& val); + ChainResumeUpdatePrimeProcessor(StorageEnv* env, const std::string& val); bool lockEdge(); }; diff --git a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp index d2246ecb002..36d4822a02b 100644 --- a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp +++ b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.cpp @@ -17,6 +17,7 @@ namespace storage { void ChainUpdateEdgeLocalProcessor::process(const cpp2::UpdateEdgeRequest& req) { if (!prepareRequest(req)) { + pushResultCode(rcPrepare_, localPartId_); onFinished(); } @@ -28,26 +29,21 @@ bool ChainUpdateEdgeLocalProcessor::prepareRequest(const cpp2::UpdateEdgeRequest spaceId_ = req.get_space_id(); localPartId_ = req_.get_part_id(); - auto rc = getSpaceVidLen(spaceId_); - if (rc != nebula::cpp2::ErrorCode::SUCCEEDED) { - pushResultCode(rc, localPartId_); + rcPrepare_ = getSpaceVidLen(spaceId_); + if (rcPrepare_ != nebula::cpp2::ErrorCode::SUCCEEDED) { return false; } - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { + std::tie(term_, rcPrepare_) = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); + if (rcPrepare_ != Code::SUCCEEDED) { return false; } return true; } -/** - * 1. set mem lock - * 2. set edge prime - * */ folly::SemiFuture ChainUpdateEdgeLocalProcessor::prepareLocal() { if (!setLock()) { - LOG(INFO) << "set lock failed, return E_WRITE_WRITE_CONFLICT"; + rcPrepare_ = Code::E_WRITE_WRITE_CONFLICT; return Code::E_WRITE_WRITE_CONFLICT; } @@ -61,18 +57,14 @@ folly::SemiFuture ChainUpdateEdgeLocalProcessor::prepareLocal() { auto c = folly::makePromiseContract(); env_->kvstore_->asyncMultiPut( spaceId_, localPartId_, std::move(data), [p = std::move(c.first), this](auto rc) mutable { - if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { - primeInserted_ = true; - } else { - VLOG(1) << "kvstore err: " << apache::thrift::util::enumNameSafe(rc); - } + rcPrepare_ = rc; p.setValue(rc); }); return std::move(c.second); } folly::SemiFuture ChainUpdateEdgeLocalProcessor::processRemote(Code code) { - LOG(INFO) << "prepareLocal()=" << apache::thrift::util::enumNameSafe(code); + VLOG(1) << " prepareLocal(): " << apache::thrift::util::enumNameSafe(code); if (code != Code::SUCCEEDED) { return code; } @@ -82,41 +74,78 @@ folly::SemiFuture ChainUpdateEdgeLocalProcessor::processRemote(Code code) } folly::SemiFuture ChainUpdateEdgeLocalProcessor::processLocal(Code code) { - LOG(INFO) << "processRemote(), code = " << apache::thrift::util::enumNameSafe(code); - if (code != Code::SUCCEEDED && code_ == Code::SUCCEEDED) { - code_ = code; + VLOG(1) << " processRemote(): " << apache::thrift::util::enumNameSafe(code); + if (rcPrepare_ != Code::SUCCEEDED) { + return rcPrepare_; } - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); + auto currTerm = env_->txnMan_->getTermFromKVStore(spaceId_, localPartId_); if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; + rcCommit_ = Code::E_LEADER_CHANGED; + return rcCommit_; } - if (code == Code::E_RPC_FAILURE) { + if (rcRemote_ == Code::E_RPC_FAILURE) { appendDoublePrime(); - addUnfinishedEdge(ResumeType::RESUME_REMOTE); } - if (code == Code::SUCCEEDED || code == Code::E_RPC_FAILURE) { - erasePrime(); - forwardToDelegateProcessor(); - } else { - if (primeInserted_) { - abort(); - } + erasePrime(); + + if (rcRemote_ != Code::SUCCEEDED && rcRemote_ != Code::E_RPC_FAILURE) { + // prepare succeed and remote failed + return abort(); } - return code_; + return commit(); +} + +void ChainUpdateEdgeLocalProcessor::finish() { + VLOG(1) << " commitLocal()=" << apache::thrift::util::enumNameSafe(rcCommit_); + do { + if (rcPrepare_ != Code::SUCCEEDED) { + break; + } + if (isKVStoreError(rcCommit_)) { + reportFailed(ResumeType::RESUME_CHAIN); + break; + } + if (rcRemote_ == Code::E_RPC_FAILURE) { + reportFailed(ResumeType::RESUME_REMOTE); + break; + } + } while (0); + + auto rc = Code::SUCCEEDED; + do { + if (rcPrepare_ != Code::SUCCEEDED) { + rc = rcPrepare_; + break; + } + + if (rcCommit_ != Code::SUCCEEDED) { + rc = rcCommit_; + break; + } + + if (rcRemote_ != Code::E_RPC_FAILURE) { + rc = rcRemote_; + break; + } + } while (0); + + pushResultCode(rc, req_.get_part_id()); + finished_.setValue(rc); + onFinished(); } void ChainUpdateEdgeLocalProcessor::doRpc(folly::Promise&& promise, int retry) noexcept { try { if (retry > retryLimit_) { - promise.setValue(Code::E_LEADER_CHANGED); + rcRemote_ = Code::E_LEADER_CHANGED; + promise.setValue(rcRemote_); return; } - auto* iClient = env_->txnMan_->getInternalClient(); + auto* iClient = env_->interClient_; folly::Promise p; auto reversedReq = reverseRequest(req_); @@ -124,17 +153,16 @@ void ChainUpdateEdgeLocalProcessor::doRpc(folly::Promise&& promise, int re iClient->chainUpdateEdge(reversedReq, term_, ver_, std::move(p)); std::move(f) .thenTry([=, p = std::move(promise)](auto&& t) mutable { - auto code = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; - VLOG(1) << "code = " << apache::thrift::util::enumNameSafe(code); - switch (code) { + rcRemote_ = t.hasValue() ? t.value() : Code::E_RPC_FAILURE; + switch (rcRemote_) { case Code::E_LEADER_CHANGED: doRpc(std::move(p), ++retry); break; default: - p.setValue(code); + p.setValue(rcRemote_); break; } - return code; + return rcRemote_; }) .get(); } catch (std::exception& ex) { @@ -155,23 +183,27 @@ void ChainUpdateEdgeLocalProcessor::appendDoublePrime() { kvAppend_.emplace_back(std::make_pair(std::move(key), std::move(val))); } -void ChainUpdateEdgeLocalProcessor::forwardToDelegateProcessor() { - kUpdateEdgeCounters.init("update_edge"); +folly::SemiFuture ChainUpdateEdgeLocalProcessor::commit() { + VLOG(1) << __func__ << "()"; UpdateEdgeProcessor::ContextAdjuster fn = [=](EdgeContext& ctx) { ctx.kvAppend = std::move(kvAppend_); ctx.kvErased = std::move(kvErased_); }; + auto [pro, fut] = folly::makePromiseContract(); auto* proc = UpdateEdgeProcessor::instance(env_); proc->adjustContext(std::move(fn)); auto f = proc->getFuture(); + std::move(f).thenTry([&, p = std::move(pro)](auto&& t) mutable { + if (t.hasValue()) { + resp_ = std::move(t.value()); + rcCommit_ = getErrorCode(resp_); + } + p.setValue(rcCommit_); + }); + proc->process(req_); - auto resp = std::move(f).get(); - code_ = getErrorCode(resp); - if (code_ != Code::SUCCEEDED) { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - std::swap(resp_, resp); + return std::move(fut); } Code ChainUpdateEdgeLocalProcessor::checkAndBuildContexts(const cpp2::UpdateEdgeRequest&) { @@ -182,26 +214,21 @@ std::string ChainUpdateEdgeLocalProcessor::sEdgeKey(const cpp2::UpdateEdgeReques return ConsistUtil::edgeKey(spaceVidLen_, req.get_part_id(), req.get_edge_key()); } -void ChainUpdateEdgeLocalProcessor::finish() { - LOG(INFO) << "ChainUpdateEdgeLocalProcessor::finish()"; - pushResultCode(code_, req_.get_part_id()); - onFinished(); -} - -void ChainUpdateEdgeLocalProcessor::abort() { - auto key = ConsistUtil::primeKey(spaceVidLen_, localPartId_, req_.get_edge_key()); - kvErased_.emplace_back(std::move(key)); +folly::SemiFuture ChainUpdateEdgeLocalProcessor::abort() { + VLOG(1) << __func__ << "()"; + if (kvErased_.empty()) { + return Code::SUCCEEDED; + } - folly::Baton baton; - env_->kvstore_->asyncMultiRemove( - req_.get_space_id(), req_.get_part_id(), std::move(kvErased_), [&](auto rc) mutable { - LOG(INFO) << " abort()=" << apache::thrift::util::enumNameSafe(rc); - if (rc != Code::SUCCEEDED) { - addUnfinishedEdge(ResumeType::RESUME_CHAIN); - } - baton.post(); - }); - baton.wait(); + auto [pro, fut] = folly::makePromiseContract(); + env_->kvstore_->asyncMultiRemove(req_.get_space_id(), + req_.get_part_id(), + std::move(kvErased_), + [&, p = std::move(pro)](auto rc) mutable { + rcCommit_ = rc; + p.setValue(rc); + }); + return std::move(fut); } cpp2::UpdateEdgeRequest ChainUpdateEdgeLocalProcessor::reverseRequest( @@ -221,12 +248,12 @@ cpp2::UpdateEdgeRequest ChainUpdateEdgeLocalProcessor::reverseRequest( bool ChainUpdateEdgeLocalProcessor::setLock() { auto spaceId = req_.get_space_id(); - auto* lockCore = env_->txnMan_->getLockCore(spaceId, req_.get_part_id()); - if (lockCore == nullptr) { + lkCore_ = env_->txnMan_->getLockCore(spaceId, req_.get_part_id(), term_); + if (lkCore_ == nullptr) { return false; } auto key = ConsistUtil::edgeKey(spaceVidLen_, req_.get_part_id(), req_.get_edge_key()); - lk_ = std::make_unique>(lockCore, key); + lk_ = std::make_unique>(lkCore_.get(), key); return lk_->isLocked(); } @@ -240,13 +267,20 @@ nebula::cpp2::ErrorCode ChainUpdateEdgeLocalProcessor::getErrorCode( return parts.front().get_code(); } -void ChainUpdateEdgeLocalProcessor::addUnfinishedEdge(ResumeType type) { - LOG(INFO) << "addUnfinishedEdge()"; +void ChainUpdateEdgeLocalProcessor::reportFailed(ResumeType type) { + VLOG(1) << __func__ << "()"; if (lk_ != nullptr) { - lk_->forceUnlock(); + lk_->setAutoUnlock(false); } auto key = ConsistUtil::edgeKey(spaceVidLen_, req_.get_part_id(), req_.get_edge_key()); - env_->txnMan_->addPrime(spaceId_, key, type); + env_->txnMan_->addPrime(spaceId_, localPartId_, term_, key, type); +} + +bool ChainUpdateEdgeLocalProcessor::isKVStoreError(nebula::cpp2::ErrorCode code) { + auto iCode = static_cast(code); + auto kvStoreErrorCodeBegin = static_cast(nebula::cpp2::ErrorCode::E_RAFT_UNKNOWN_PART); + auto kvStoreErrorCodeEnd = static_cast(nebula::cpp2::ErrorCode::E_RAFT_ATOMIC_OP_FAILED); + return iCode >= kvStoreErrorCodeBegin && iCode <= kvStoreErrorCodeEnd; } } // namespace storage diff --git a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h index 2f84f343a83..d4b1a9af0f9 100644 --- a/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h +++ b/src/storage/transaction/ChainUpdateEdgeLocalProcessor.h @@ -49,9 +49,9 @@ class ChainUpdateEdgeLocalProcessor void doRpc(folly::Promise&& promise, int retry = 0) noexcept; - folly::SemiFuture processNormalLocal(Code code); + folly::SemiFuture commit(); - void abort(); + folly::SemiFuture abort(); bool prepareRequest(const cpp2::UpdateEdgeRequest& req); @@ -59,15 +59,13 @@ class ChainUpdateEdgeLocalProcessor void appendDoublePrime(); - void forwardToDelegateProcessor(); - std::string sEdgeKey(const cpp2::UpdateEdgeRequest& req); cpp2::UpdateEdgeRequest reverseRequest(const cpp2::UpdateEdgeRequest& req); bool setLock(); - void addUnfinishedEdge(ResumeType type); + void reportFailed(ResumeType type); int64_t getVersion(const cpp2::UpdateEdgeRequest& req); @@ -75,8 +73,11 @@ class ChainUpdateEdgeLocalProcessor Code checkAndBuildContexts(const cpp2::UpdateEdgeRequest& req) override; + bool isKVStoreError(nebula::cpp2::ErrorCode code); + protected: cpp2::UpdateEdgeRequest req_; + TransactionManager::SPtrLock lkCore_; std::unique_ptr lk_; PartitionID localPartId_; int retryLimit_{10}; diff --git a/src/storage/transaction/ConsistUtil.cpp b/src/storage/transaction/ConsistUtil.cpp index d80d288b2f7..a0923d84a96 100644 --- a/src/storage/transaction/ConsistUtil.cpp +++ b/src/storage/transaction/ConsistUtil.cpp @@ -12,50 +12,54 @@ #include "common/utils/NebulaKeyUtils.h" namespace nebula { namespace storage { - -static const std::string kPrimeTable{"__prime__"}; // NOLINT -static const std::string kDoublePrimeTable{"__prime_prime__"}; // NOLINT - -std::string ConsistUtil::primeTable() { - return kPrimeTable; +std::string ConsistUtil::primeTable(PartitionID partId) { + auto item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kPrime); + std::string key; + key.reserve(sizeof(PartitionID)); + key.append(reinterpret_cast(&item), sizeof(PartitionID)); + return key; } -std::string ConsistUtil::doublePrimeTable() { - return kDoublePrimeTable; +std::string ConsistUtil::doublePrimeTable(PartitionID partId) { + auto item = (partId << kPartitionOffset) | static_cast(NebulaKeyType::kDoublePrime); + std::string key; + key.reserve(sizeof(PartitionID)); + key.append(reinterpret_cast(&item), sizeof(PartitionID)); + return key; } std::string ConsistUtil::primePrefix(PartitionID partId) { - return kPrimeTable + NebulaKeyUtils::edgePrefix(partId); + return primeTable(partId) + NebulaKeyUtils::edgePrefix(partId); } std::string ConsistUtil::doublePrimePrefix(PartitionID partId) { - return kDoublePrimeTable + NebulaKeyUtils::edgePrefix(partId); + return doublePrimeTable(partId) + NebulaKeyUtils::edgePrefix(partId); } std::string ConsistUtil::primeKey(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& edgeKey) { - return kPrimeTable + NebulaKeyUtils::edgeKey(vIdLen, - partId, - edgeKey.get_src().getStr(), - edgeKey.get_edge_type(), - edgeKey.get_ranking(), - edgeKey.get_dst().getStr()); + return primeTable(partId) + NebulaKeyUtils::edgeKey(vIdLen, + partId, + edgeKey.get_src().getStr(), + edgeKey.get_edge_type(), + edgeKey.get_ranking(), + edgeKey.get_dst().getStr()); } folly::StringPiece ConsistUtil::edgeKeyFromPrime(const folly::StringPiece& key) { - return folly::StringPiece(key.begin() + kPrimeTable.size(), key.end()); + return folly::StringPiece(key.begin() + sizeof(PartitionID), key.end()); } folly::StringPiece ConsistUtil::edgeKeyFromDoublePrime(const folly::StringPiece& key) { - return folly::StringPiece(key.begin() + kDoublePrimeTable.size(), key.end()); + return folly::StringPiece(key.begin() + sizeof(PartitionID), key.end()); } std::string ConsistUtil::doublePrime(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& key) { - return kDoublePrimeTable + NebulaKeyUtils::edgeKey(vIdLen, - partId, - key.get_src().getStr(), - key.get_edge_type(), - key.get_ranking(), - key.get_dst().getStr()); + return doublePrimeTable(partId) + NebulaKeyUtils::edgeKey(vIdLen, + partId, + key.get_src().getStr(), + key.get_edge_type(), + key.get_ranking(), + key.get_dst().getStr()); } RequestType ConsistUtil::parseType(folly::StringPiece val) { @@ -123,7 +127,6 @@ void ConsistUtil::reverseEdgeKeyInplace(cpp2::EdgeKey& edgeKey) { } int64_t ConsistUtil::toInt(const ::nebula::Value& val) { - // return ConsistUtil::toInt2(val.toString()); auto str = val.toString(); if (str.size() < 3) { return 0; @@ -131,19 +134,19 @@ int64_t ConsistUtil::toInt(const ::nebula::Value& val) { return *reinterpret_cast(const_cast(str.data() + 1)); } -int64_t ConsistUtil::toInt2(const std::string& str) { - if (str.size() < 8) { - return 0; - } - return *reinterpret_cast(const_cast(str.data())); -} - -std::string ConsistUtil::readableKey(size_t vidLen, const std::string& rawKey) { +std::string ConsistUtil::readableKey(size_t vidLen, bool isIntVid, const std::string& rawKey) { auto src = NebulaKeyUtils::getSrcId(vidLen, rawKey); auto dst = NebulaKeyUtils::getDstId(vidLen, rawKey); auto rank = NebulaKeyUtils::getRank(vidLen, rawKey); std::stringstream ss; - ss << ConsistUtil::toInt2(src.str()) << "->" << ConsistUtil::toInt2(dst.str()) << "@" << rank; + ss << std::boolalpha << "isIntVid=" << isIntVid << ", "; + if (isIntVid) { + ss << *reinterpret_cast(const_cast(src.begin())) << "--" + << *reinterpret_cast(const_cast(dst.begin())); + } else { + ss << src.str() << "--" << dst.str(); + } + ss << "@" << rank; return ss.str(); } @@ -181,12 +184,14 @@ cpp2::DeleteEdgesRequest DeleteEdgesRequestHelper::parseDeleteEdgesRequest(const return req; } -std::string DeleteEdgesRequestHelper::explain(const cpp2::DeleteEdgesRequest& req) { +std::string DeleteEdgesRequestHelper::explain(const cpp2::DeleteEdgesRequest& req, bool isIntVid) { std::stringstream oss; for (auto& partOfKeys : req.get_parts()) { for (auto& key : partOfKeys.second) { - oss << ConsistUtil::toInt(key.get_src()) << "->" << ConsistUtil::toInt(key.get_dst()) << "@" - << key.get_ranking() << ", "; + if (isIntVid) { + oss << ConsistUtil::toInt(key.get_src()) << "->" << ConsistUtil::toInt(key.get_dst()) << "@" + << key.get_ranking() << ", "; + } } } return oss.str(); diff --git a/src/storage/transaction/ConsistUtil.h b/src/storage/transaction/ConsistUtil.h index 0ca2fc918d5..48f9aae1fb6 100644 --- a/src/storage/transaction/ConsistUtil.h +++ b/src/storage/transaction/ConsistUtil.h @@ -17,13 +17,9 @@ namespace nebula { namespace storage { class ConsistUtil final { public: - static std::string primeTable(); + static std::string primeTable(PartitionID partId); - static std::string doublePrimeTable(); - - static std::string deletePrimeTable(); - - static std::string deleteDoublePrimeTable(); + static std::string doublePrimeTable(PartitionID partId); static std::string edgeKey(size_t vIdLen, PartitionID partId, const cpp2::EdgeKey& key); @@ -87,9 +83,7 @@ class ConsistUtil final { */ static int64_t toInt(const ::nebula::Value& val); - static int64_t toInt2(const std::string& val); - - static std::string readableKey(size_t vidLen, const std::string& rawKey); + static std::string readableKey(size_t vidLen, bool isIntId, const std::string& rawKey); static std::vector toStrKeys(const cpp2::DeleteEdgesRequest& req, int vidLen); @@ -104,7 +98,7 @@ struct DeleteEdgesRequestHelper final { static cpp2::DeleteEdgesRequest parseDeleteEdgesRequest(const std::string& val); - static std::string explain(const cpp2::DeleteEdgesRequest& req); + static std::string explain(const cpp2::DeleteEdgesRequest& req, bool isIntVid); }; } // namespace storage diff --git a/src/storage/transaction/ResumeAddEdgeProcessor.cpp b/src/storage/transaction/ResumeAddEdgeProcessor.cpp deleted file mode 100644 index 3ca1bfb18c5..00000000000 --- a/src/storage/transaction/ResumeAddEdgeProcessor.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeAddEdgeProcessor.h" - -namespace nebula { -namespace storage { - -ResumeAddEdgeProcessor::ResumeAddEdgeProcessor(StorageEnv* env, const std::string& val) - : ChainAddEdgesLocalProcessor(env) { - req_ = ConsistUtil::parseAddRequest(val); - - uuid_ = ConsistUtil::strUUID(); - readableEdgeDesc_ = makeReadableEdge(req_); - VLOG(1) << uuid_ << " resume prime " << readableEdgeDesc_; - ChainAddEdgesLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeAddEdgeProcessor::prepareLocal() { - if (code_ != Code::SUCCEEDED) { - return code_; - } - auto spaceId = req_.get_space_id(); - auto numOfPart = env_->metaClient_->partsNum(spaceId); - if (!numOfPart.ok()) { - return Code::E_SPACE_NOT_FOUND; - } - auto& parts = req_.get_parts(); - auto& srcId = parts.begin()->second.back().get_key().get_src().getStr(); - auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); - localPartId_ = env_->metaClient_->partId(numOfPart.value(), srcId); - remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); - - return code_; -} - -folly::SemiFuture ResumeAddEdgeProcessor::processRemote(Code code) { - VLOG(1) << uuid_ << " prepareLocal() " << apache::thrift::util::enumNameSafe(code); - return ChainAddEdgesLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeAddEdgeProcessor::processLocal(Code code) { - VLOG(1) << uuid_ << " processRemote() " << apache::thrift::util::enumNameSafe(code); - setErrorCode(code); - - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::E_RPC_FAILURE) { - kvAppend_ = ChainAddEdgesLocalProcessor::makeDoublePrime(); - } - - if (code == Code::E_RPC_FAILURE || code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - erasePrime(); - code_ = forwardToDelegateProcessor().get(); - return code_; - } - - return code; -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp b/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp deleted file mode 100644 index 21259f74afa..00000000000 --- a/src/storage/transaction/ResumeAddEdgeRemoteProcessor.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeAddEdgeRemoteProcessor.h" - -namespace nebula { -namespace storage { - -ResumeAddEdgeRemoteProcessor::ResumeAddEdgeRemoteProcessor(StorageEnv* env, const std::string& val) - : ChainAddEdgesLocalProcessor(env) { - req_ = ConsistUtil::parseAddRequest(val); - ChainAddEdgesLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (code_ != Code::SUCCEEDED) { - return code_; - } - - auto spaceId = req_.get_space_id(); - auto numOfPart = env_->metaClient_->partsNum(spaceId); - if (!numOfPart.ok()) { - return Code::E_SPACE_NOT_FOUND; - } - auto& parts = req_.get_parts(); - auto& dstId = parts.begin()->second.back().get_key().get_dst().getStr(); - remotePartId_ = env_->metaClient_->partId(numOfPart.value(), dstId); - - return Code::SUCCEEDED; -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::processRemote(Code code) { - return ChainAddEdgesLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeAddEdgeRemoteProcessor::processLocal(Code code) { - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::E_OUTDATED_TERM) { - // E_OUTDATED_TERM indicate this host is no longer the leader of curr part - // any following kv operation will fail - // due to not allowed to write from follower - return code; - } - - if (code == Code::E_RPC_FAILURE) { - // nothing to do, as we are already an rpc failure - } - - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - ChainAddEdgesLocalProcessor::eraseDoublePrime(); - code_ = forwardToDelegateProcessor().get(); - return code_; - } - - return code; -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp b/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp deleted file mode 100644 index 5bfa6ed2a65..00000000000 --- a/src/storage/transaction/ResumeUpdateRemoteProcessor.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (c) 2021 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License. - */ - -#include "storage/transaction/ResumeUpdateRemoteProcessor.h" - -#include - -namespace nebula { -namespace storage { - -ResumeUpdateRemoteProcessor::ResumeUpdateRemoteProcessor(StorageEnv* env, const std::string& val) - : ChainUpdateEdgeLocalProcessor(env) { - req_ = ConsistUtil::parseUpdateRequest(val); - ChainUpdateEdgeLocalProcessor::prepareRequest(req_); -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::prepareLocal() { - std::tie(term_, code_) = env_->txnMan_->getTerm(spaceId_, localPartId_); - return code_; -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::processRemote(Code code) { - return ChainUpdateEdgeLocalProcessor::processRemote(code); -} - -folly::SemiFuture ResumeUpdateRemoteProcessor::processLocal(Code code) { - setErrorCode(code); - - auto currTerm = env_->txnMan_->getTerm(spaceId_, localPartId_); - if (currTerm.first != term_) { - LOG(WARNING) << "E_LEADER_CHANGED during prepare and commit local"; - code_ = Code::E_LEADER_CHANGED; - } - - if (code == Code::SUCCEEDED) { - // if there are something wrong other than rpc failure - // we need to keep the resume retry(by not remove those prime key) - auto key = ConsistUtil::doublePrime(spaceVidLen_, localPartId_, req_.get_edge_key()); - kvErased_.emplace_back(std::move(key)); - forwardToDelegateProcessor(); - return code; - } else { - // we can't decide if the double prime should be deleted. - // so do nothing - } - - return code; -} - -void ResumeUpdateRemoteProcessor::finish() { - if (FLAGS_trace_toss) { - VLOG(1) << "commitLocal()=" << apache::thrift::util::enumNameSafe(code_); - } - finished_.setValue(code_); - onFinished(); -} - -} // namespace storage -} // namespace nebula diff --git a/src/storage/transaction/TransactionManager.cpp b/src/storage/transaction/TransactionManager.cpp index 2c91b20b8de..f44dfd9e469 100644 --- a/src/storage/transaction/TransactionManager.cpp +++ b/src/storage/transaction/TransactionManager.cpp @@ -12,47 +12,106 @@ #include "kvstore/NebulaStore.h" #include "storage/CommonUtils.h" #include "storage/StorageFlags.h" -#include "storage/transaction/ChainResumeProcessor.h" +#include "storage/transaction/ChainProcessorFactory.h" namespace nebula { namespace storage { DEFINE_int32(resume_interval_secs, 10, "Resume interval"); - -ProcessorCounters kForwardTranxCounters; +DEFINE_int32(toss_worker_num, 16, "Resume interval"); TransactionManager::TransactionManager(StorageEnv* env) : env_(env) { LOG(INFO) << "TransactionManager ctor()"; - exec_ = std::make_shared(10); - iClient_ = env_->interClient_; - resumeThread_ = std::make_unique(); + exec_ = std::make_shared(FLAGS_toss_worker_num); +} + +bool TransactionManager::start() { std::vector> existParts; auto fn = std::bind(&TransactionManager::onNewPartAdded, this, std::placeholders::_1); static_cast<::nebula::kvstore::NebulaStore*>(env_->kvstore_) ->registerOnNewPartAdded("TransactionManager", fn, existParts); - for (auto& partOfSpace : existParts) { - scanPrimes(partOfSpace.first, partOfSpace.second); + for (auto&& [spaceId, partId] : existParts) { + auto [termId, rc] = getTermFromKVStore(spaceId, partId); + if (rc != Code::SUCCEEDED) { + continue; + } + scanPrimes(spaceId, partId, termId); } + return true; +} + +void TransactionManager::monitorPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg) { + monPoolStats_.emplace_back(std::make_pair(pool, msg)); } -TransactionManager::LockCore* TransactionManager::getLockCore(GraphSpaceID spaceId, - GraphSpaceID partId, - bool checkWhiteList) { +void TransactionManager::bgPrintPoolStat() { + while (!stop_) { + for (auto&& [pool, msg] : monPoolStats_) { + VLOG(1) << dumpPoolStat(pool, msg); + } + std::this_thread::sleep_for(std::chrono::seconds(20)); + } +} + +std::string TransactionManager::dumpPoolStat(folly::ThreadPoolExecutor* exec, + const std::string& msg) { + auto stats = exec->getPoolStats(); + std::stringstream oss; + oss << "\npoolStats: " << msg << "\n\t threadCount = " << stats.threadCount + << "\n\t idleThreadCount = " << stats.idleThreadCount + << "\n\t activeThreadCount = " << stats.activeThreadCount + << "\n\t pendingTaskCount = " << stats.pendingTaskCount + << "\n\t totalTaskCount = " << stats.totalTaskCount << "\n"; + return oss.str(); +} + +void TransactionManager::stop() { + LOG(INFO) << "TransactionManager stop()"; + stop_ = true; +} + +void TransactionManager::join() { + LOG(INFO) << "TransactionManager join()"; + exec_->stop(); +} + +void TransactionManager::addChainTask(ChainBaseProcessor* proc) { + if (stop_) { + return; + } + folly::via(exec_.get()) + .thenValue([=](auto&&) { return proc->prepareLocal(); }) + .thenValue([=](auto&& code) { return proc->processRemote(code); }) + .thenValue([=](auto&& code) { return proc->processLocal(code); }) + .ensure([=]() { proc->finish(); }); +} + +TransactionManager::SPtrLock TransactionManager::getLockCore(GraphSpaceID spaceId, + GraphSpaceID partId, + TermID termId, + bool checkWhiteList) { if (checkWhiteList) { - if (scannedParts_.find(std::make_pair(spaceId, partId)) == scannedParts_.end()) { + auto currTermKey = std::make_pair(spaceId, partId); + auto it = currTerm_.find(currTermKey); + if (it == currTerm_.end()) { + return nullptr; + } + if (it->second != termId) { return nullptr; } } - auto it = memLocks_.find(spaceId); + MemLockKey key = std::make_tuple(spaceId, partId, termId); + auto it = memLocks_.find(key); if (it != memLocks_.end()) { - return it->second.get(); + return it->second; } - auto item = memLocks_.insert(spaceId, std::make_unique()); - return item.first->second.get(); + auto item = memLocks_.insert(key, std::make_shared()); + return item.first->second; } -std::pair TransactionManager::getTerm(GraphSpaceID spaceId, PartitionID partId) { +std::pair TransactionManager::getTermFromKVStore(GraphSpaceID spaceId, + PartitionID partId) { TermID termId = -1; auto rc = Code::SUCCEEDED; auto part = env_->kvstore_->part(spaceId, partId); @@ -67,13 +126,13 @@ std::pair TransactionManager::getTerm(GraphSpaceID spaceId, Partit bool TransactionManager::checkTermFromCache(GraphSpaceID spaceId, PartitionID partId, TermID termId) { - auto termOfMeta = env_->metaClient_->getTermFromCache(spaceId, partId); - if (termOfMeta.ok()) { - if (termId < termOfMeta.value()) { + auto termFromMeta = env_->metaClient_->getTermFromCache(spaceId, partId); + if (termFromMeta.ok()) { + if (termId < termFromMeta.value()) { LOG(WARNING) << "checkTerm() failed: " << "spaceId=" << spaceId << ", partId=" << partId << ", in-coming term=" << termId - << ", term in meta cache=" << termOfMeta.value(); + << ", term in meta cache=" << termFromMeta.value(); return false; } } @@ -89,75 +148,35 @@ bool TransactionManager::checkTermFromCache(GraphSpaceID spaceId, return true; } -void TransactionManager::resumeThread() { - SCOPE_EXIT { - resumeThread_->addDelayTask( - FLAGS_resume_interval_secs * 1000, &TransactionManager::resumeThread, this); - }; - ChainResumeProcessor proc(env_); - proc.process(); -} - -bool TransactionManager::start() { - if (!resumeThread_->start()) { - LOG(ERROR) << "resume thread start failed"; - return false; - } - resumeThread_->addDelayTask( - FLAGS_resume_interval_secs * 1000, &TransactionManager::resumeThread, this); - return true; -} - -void TransactionManager::stop() { - exec_->stop(); - resumeThread_->stop(); - resumeThread_->wait(); -} - -std::string TransactionManager::makeLockKey(GraphSpaceID spaceId, const std::string& edge) { - std::string lockKey; - lockKey.append(reinterpret_cast(&spaceId), sizeof(GraphSpaceID)).append(edge); - return lockKey; -} - -std::string TransactionManager::getEdgeKey(const std::string& lockKey) { - std::string edgeKey(lockKey.c_str() + sizeof(GraphSpaceID)); - return edgeKey; -} - -void TransactionManager::addPrime(GraphSpaceID spaceId, const std::string& edge, ResumeType type) { - VLOG(1) << "addPrime() space=" << spaceId << ", hex=" << folly::hexlify(edge) +void TransactionManager::addPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& egKey, + ResumeType type) { + VLOG(2) << "addPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey) << ", ResumeType=" << static_cast(type); - auto key = makeLockKey(spaceId, edge); - dangleEdges_.insert(std::make_pair(key, type)); -} - -void TransactionManager::delPrime(GraphSpaceID spaceId, const std::string& edge) { - VLOG(1) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(edge) << ", readable " - << ConsistUtil::readableKey(8, edge); - auto key = makeLockKey(spaceId, edge); - dangleEdges_.erase(key); - - auto partId = NebulaKeyUtils::getPart(edge); - auto* lk = getLockCore(spaceId, partId, false); - lk->unlock(edge); -} - -void TransactionManager::scanAll() { - LOG(INFO) << "scanAll()"; - std::unordered_map> leaders; - if (env_->kvstore_->allLeader(leaders) == 0) { - LOG(INFO) << "no leader found, skip any resume process"; + auto* proc = ChainProcessorFactory::make(env_, spaceId, termId, egKey, type); + if (proc == nullptr) { + VLOG(1) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey); + auto lk = getLockCore(spaceId, partId, termId, false); + if (lk) { + lk->unlock(egKey); + } + // delPrime(spaceId, partId, termId, egKey); return; } - for (auto& leader : leaders) { - auto spaceId = leader.first; - for (auto& partInfo : leader.second) { - auto partId = partInfo.get_part_id(); - scanPrimes(spaceId, partId); + auto fut = proc->getFinished(); + std::move(fut).thenValue([=](auto&& code) { + if (code == Code::SUCCEEDED) { + VLOG(2) << "delPrime() space=" << spaceId << ", hex=" << folly::hexlify(egKey); + auto lk = getLockCore(spaceId, partId, termId, false); + if (lk) { + lk->unlock(egKey); + } + // env_->txnMan_->delPrime(spaceId, partId, termId, egKey); } - } - LOG(INFO) << "finish scanAll()"; + }); + addChainTask(proc); } void TransactionManager::onNewPartAdded(std::shared_ptr& part) { @@ -175,75 +194,73 @@ void TransactionManager::onLeaderLostWrapper(const ::nebula::kvstore::Part::Call opt.spaceId, opt.partId, opt.term); - scannedParts_.erase(std::make_pair(opt.spaceId, opt.partId)); - dangleEdges_.clear(); + auto currTermKey = std::make_pair(opt.spaceId, opt.partId); + auto currTermIter = currTerm_.find(currTermKey); + if (currTermIter == currTerm_.end()) { + return; + } + auto memLockKey = std::make_tuple(opt.spaceId, opt.partId, currTermIter->second); + memLocks_.erase(memLockKey); } void TransactionManager::onLeaderElectedWrapper( const ::nebula::kvstore::Part::CallbackOptions& opt) { LOG(INFO) << folly::sformat( "leader get do scanPrimes space={}, part={}, term={}", opt.spaceId, opt.partId, opt.term); - scanPrimes(opt.spaceId, opt.partId); + scanPrimes(opt.spaceId, opt.partId, opt.term); } -void TransactionManager::scanPrimes(GraphSpaceID spaceId, PartitionID partId) { - LOG(INFO) << folly::sformat("{}(), spaceId={}, partId={}", __func__, spaceId, partId); +void TransactionManager::scanPrimes(GraphSpaceID spaceId, PartitionID partId, TermID termId) { + LOG(INFO) << folly::sformat( + "{}(), space={}, part={}, term={}", __func__, spaceId, partId, termId); std::unique_ptr iter; auto prefix = ConsistUtil::primePrefix(partId); auto rc = env_->kvstore_->prefix(spaceId, partId, prefix, &iter); if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { for (; iter->valid(); iter->next()) { - auto edgeKey = ConsistUtil::edgeKeyFromPrime(iter->key()); - VLOG(1) << "scanned edgekey: " << folly::hexlify(edgeKey) - << ", readable: " << ConsistUtil::readableKey(8, edgeKey.str()); - auto lockKey = makeLockKey(spaceId, edgeKey.str()); - auto insSucceed = dangleEdges_.insert(std::make_pair(lockKey, ResumeType::RESUME_CHAIN)); - if (!insSucceed.second) { - LOG(ERROR) << "not supposed to insert fail: " << folly::hexlify(edgeKey); - } - auto* lk = getLockCore(spaceId, partId, false); - auto succeed = lk->try_lock(edgeKey.str()); + auto edgeKey = ConsistUtil::edgeKeyFromPrime(iter->key()).str(); + VLOG(1) << "scanned prime edge: " << folly::hexlify(edgeKey); + auto lk = getLockCore(spaceId, partId, termId, false); + auto succeed = lk->try_lock(edgeKey); if (!succeed) { - LOG(ERROR) << "not supposed to lock fail: " << folly::hexlify(edgeKey); + LOG(ERROR) << "not supposed to lock fail: " + << ", spaceId " << spaceId << ", partId " << partId << ", termId " << termId + << folly::hexlify(edgeKey); } + addPrime(spaceId, partId, termId, edgeKey, ResumeType::RESUME_CHAIN); } } else { VLOG(1) << "primePrefix() " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - return; - } } prefix = ConsistUtil::doublePrimePrefix(partId); rc = env_->kvstore_->prefix(spaceId, partId, prefix, &iter); if (rc == nebula::cpp2::ErrorCode::SUCCEEDED) { for (; iter->valid(); iter->next()) { - auto edgeKey = ConsistUtil::edgeKeyFromDoublePrime(iter->key()); - auto lockKey = makeLockKey(spaceId, edgeKey.str()); - auto insSucceed = dangleEdges_.insert(std::make_pair(lockKey, ResumeType::RESUME_REMOTE)); - if (!insSucceed.second) { - LOG(ERROR) << "not supposed to insert fail: " << folly::hexlify(edgeKey); - } - auto* lk = getLockCore(spaceId, partId, false); - auto succeed = lk->try_lock(edgeKey.str()); + auto edgeKey = ConsistUtil::edgeKeyFromDoublePrime(iter->key()).str(); + VLOG(1) << "scanned double prime edge: " << folly::hexlify(edgeKey); + auto lk = getLockCore(spaceId, partId, termId, false); + auto succeed = lk->try_lock(edgeKey); if (!succeed) { - LOG(ERROR) << "not supposed to lock fail: " << folly::hexlify(edgeKey); + LOG(ERROR) << "not supposed to lock fail: " + << ", space " << spaceId << ", partId " << partId << ", termId " << termId + << folly::hexlify(edgeKey); } + addPrime(spaceId, partId, termId, edgeKey, ResumeType::RESUME_REMOTE); } } else { VLOG(1) << "doublePrimePrefix() " << apache::thrift::util::enumNameSafe(rc); - if (rc == nebula::cpp2::ErrorCode::E_LEADER_CHANGED) { - return; - } } - auto partOfSpace = std::make_pair(spaceId, partId); - auto insRet = scannedParts_.insert(std::make_pair(partOfSpace, 0)); - LOG(INFO) << "insert space=" << spaceId << ", part=" << partId - << ", into white list suc=" << std::boolalpha << insRet.second; + + auto currTermKey = std::make_pair(spaceId, partId); + currTerm_.insert_or_assign(currTermKey, termId); + + LOG(INFO) << "set curr term spaceId = " << spaceId << ", partId = " << partId + << ", termId = " << termId; } -folly::ConcurrentHashMap* TransactionManager::getDangleEdges() { - return &dangleEdges_; +folly::EventBase* TransactionManager::getEventBase() { + return exec_->getEventBase(); } } // namespace storage diff --git a/src/storage/transaction/TransactionManager.h b/src/storage/transaction/TransactionManager.h index acfc2517506..f94156b92e6 100644 --- a/src/storage/transaction/TransactionManager.h +++ b/src/storage/transaction/TransactionManager.h @@ -26,100 +26,125 @@ class TransactionManager { public: FRIEND_TEST(ChainUpdateEdgeTest, updateTest1); friend class FakeInternalStorageClient; + friend class TransactionManagerTester; using LockGuard = MemoryLockGuard; using LockCore = MemoryLockCore; using UPtrLock = std::unique_ptr; + using SPtrLock = std::shared_ptr; public: explicit TransactionManager(storage::StorageEnv* env); ~TransactionManager() { stop(); - } - - void addChainTask(ChainBaseProcessor* proc) { - folly::async([=] { - proc->prepareLocal() - .via(exec_.get()) - .thenValue([=](auto&& code) { return proc->processRemote(code); }) - .thenValue([=](auto&& code) { return proc->processLocal(code); }) - .ensure([=]() { proc->finish(); }); - }); - } - - folly::Executor* getExecutor() { - return exec_.get(); + join(); } bool start(); void stop(); - LockCore* getLockCore(GraphSpaceID spaceId, PartitionID partId, bool checkWhiteList = true); + /** + * @brief wait until stop + */ + void join(); - InternalStorageClient* getInternalClient() { - return iClient_; - } + /** + * @brief add a new processor to do "chain" work, + * using the internal executor of transaction manager. + * + * @param proc + */ + void addChainTask(ChainBaseProcessor* proc); + + /** + * @brief Get the Lock Core object to set a memory lock for a key. + * + * @param spaceId + * @param partId + * @param termId + * @param checkWhiteList caller outside TransactionManager have to set this true. + * @return nullptr if failed. + */ + SPtrLock getLockCore(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + bool checkWhiteList = true); // get term of part from kvstore, may fail if this part is not exist - std::pair getTerm(GraphSpaceID spaceId, PartitionID partId); + std::pair getTermFromKVStore(GraphSpaceID spaceId, + PartitionID partId); // check get term from local term cache // this is used by Chain...RemoteProcessor, // to avoid an old leader request overrider a newer leader's bool checkTermFromCache(GraphSpaceID spaceId, PartitionID partId, TermID termId); - void reportFailed(); - // leave a record for (double)prime edge, to let resume processor there is one dangling edge - void addPrime(GraphSpaceID spaceId, const std::string& edgeKey, ResumeType type); + void addPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& edgeKey, + ResumeType type); + + // delete a prime record when recover succeeded. + void delPrime(GraphSpaceID spaceId, + PartitionID partId, + TermID termId, + const std::string& edgeKey); - void delPrime(GraphSpaceID spaceId, const std::string& edgeKey); - - bool checkUnfinishedEdge(GraphSpaceID spaceId, const folly::StringPiece& key); + /** + * @brief need to do a scan to let all prime(double prime) set a memory lock, + * before a partition start to serve. + * otherwise, if a new request comes, it will overwrite the existing lock. + * @param spaceId + * @param partId + */ + void scanPrimes(GraphSpaceID spaceId, PartitionID partId, TermID termId); - folly::ConcurrentHashMap* getDangleEdges(); + /** + * @brief Get the an Event Base object from its internal executor + * + * @return folly::EventBase* + */ + folly::EventBase* getEventBase(); - void scanPrimes(GraphSpaceID spaceId, PartitionID partId); + /** + * @brief stat thread, used for debug + */ + void monitorPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg); + void bgPrintPoolStat(); + std::string dumpPoolStat(folly::ThreadPoolExecutor* pool, const std::string& msg); - void scanAll(); + bool stop_{false}; + std::vector> monPoolStats_; protected: - void resumeThread(); - - std::string makeLockKey(GraphSpaceID spaceId, const std::string& edge); - - std::string getEdgeKey(const std::string& lockKey); - // this is a callback register to NebulaStore on new part added. void onNewPartAdded(std::shared_ptr& part); // this is a callback register to Part::onElected void onLeaderElectedWrapper(const ::nebula::kvstore::Part::CallbackOptions& options); + // this is a callback register to Part::onLostLeadership void onLeaderLostWrapper(const ::nebula::kvstore::Part::CallbackOptions& options); protected: - using PartUUID = std::pair; + using SpacePart = std::pair; StorageEnv* env_{nullptr}; std::shared_ptr exec_; - InternalStorageClient* iClient_; - folly::ConcurrentHashMap memLocks_; - folly::ConcurrentHashMap cachedTerms_; - std::unique_ptr resumeThread_; - /** - * edges need to recover will put into this, - * resume processor will get edge from this then do resume. - * */ - folly::ConcurrentHashMap dangleEdges_; + folly::ConcurrentHashMap cachedTerms_; + + using MemLockKey = std::tuple; + folly::ConcurrentHashMap memLocks_; /** * @brief every raft part need to do a scan, * only scanned part allowed to insert edges */ - folly::ConcurrentHashMap, int> scannedParts_; + folly::ConcurrentHashMap, TermID> currTerm_; }; } // namespace storage