From 8db6ea3a018600da17efefe4aecec65a8af7f702 Mon Sep 17 00:00:00 2001 From: hui lai Date: Thu, 5 Dec 2024 15:42:26 +0800 Subject: [PATCH] [fix](routine load) replace heavy work pool with routine load thread pool for metadata fetching (#44907) In production, we encountered an issue where the librdkafka consumer stucked during destruction, causing the heavy work pool to become saturated, which in turn made all heavy work pool-dependent functionalities, such as querying, unusable. To mitigate this impact, we replaced the heavy work pool with routine load threads for metadata fetching. --- be/src/runtime/routine_load/routine_load_task_executor.h | 2 ++ be/src/service/internal_service.cpp | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/be/src/runtime/routine_load/routine_load_task_executor.h b/be/src/runtime/routine_load/routine_load_task_executor.h index 0e597d796c9f77..b1196f7824afac 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.h +++ b/be/src/runtime/routine_load/routine_load_task_executor.h @@ -73,6 +73,8 @@ class RoutineLoadTaskExecutor { std::vector* partition_offsets, int timeout); + ThreadPool& get_thread_pool() { return *_thread_pool; } + private: // execute the task void exec_task(std::shared_ptr ctx, DataConsumerPool* pool, diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index be99278ab541a3..463beff6d2ffea 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -1238,7 +1238,10 @@ void PInternalService::report_stream_load_status(google::protobuf::RpcController void PInternalService::get_info(google::protobuf::RpcController* controller, const PProxyRequest* request, PProxyResult* response, google::protobuf::Closure* done) { - bool ret = _heavy_work_pool.try_offer([this, request, response, done]() { + bool ret = _exec_env->routine_load_task_executor()->get_thread_pool().submit_func([this, + request, + response, + done]() { brpc::ClosureGuard closure_guard(done); // PProxyRequest is defined in gensrc/proto/internal_service.proto // Currently it supports 2 kinds of requests: