diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index a4087483a536f2..0d10314911fbbb 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -51,8 +51,10 @@ #include "util/doris_metrics.h" #include "util/file_utils.h" #include "util/monotime.h" +#include "util/scoped_cleanup.h" #include "util/stopwatch.hpp" #include "util/threadpool.h" +#include "util/trace.h" using std::deque; using std::list; @@ -257,6 +259,7 @@ void TaskWorkerPool::_remove_task_info(const TTaskType::type task_type, int64_t EnumToString(TTaskType, task_type, type_str); LOG(INFO) << "remove task info. type=" << type_str << ", signature=" << signature << ", queue_size=" << queue_size; + TRACE("remove task info"); } void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request) { @@ -278,6 +281,7 @@ void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request) } sleep(config::sleep_one_second); } + TRACE("finish task"); } uint32_t TaskWorkerPool::_get_next_task_index(int32_t thread_count, @@ -324,6 +328,17 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { _tasks.pop_front(); } + scoped_refptr trace(new Trace); + MonotonicStopWatch watch; + watch.start(); + SCOPED_CLEANUP({ + if (watch.elapsed_time() / 1e9 > config::agent_task_trace_threshold_sec) { + LOG(WARNING) << "Trace:" << std::endl << trace->DumpToString(Trace::INCLUDE_ALL); + } + }); + ADOPT_TRACE(trace.get()); + TRACE("start to create tablet $0", create_tablet_req.tablet_id); + TStatusCode::type status_code = TStatusCode::OK; std::vector error_msgs; TStatus task_status; @@ -351,6 +366,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() { tablet_info.__set_path_hash(tablet->data_dir()->path_hash()); finish_tablet_infos.push_back(tablet_info); } + TRACE("StorageEngine create tablet finish, status: $0", create_status); task_status.__set_status_code(status_code); task_status.__set_error_msgs(error_msgs); diff --git a/be/src/common/config.h b/be/src/common/config.h index 75ed1eb79f18c2..93f6bfeacb11c8 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -331,6 +331,9 @@ CONF_mInt64(row_step_for_compaction_merge_log, "0"); CONF_mInt32(base_compaction_trace_threshold, "10"); CONF_mInt32(cumulative_compaction_trace_threshold, "2"); +// Threshold to logging agent task trace, in seconds. +CONF_mInt32(agent_task_trace_threshold_sec, "2"); + // time interval to record tablet scan count in second for the purpose of calculating tablet scan frequency CONF_mInt64(tablet_scan_frequency_time_node_interval_second, "300"); // coefficient for tablet scan frequency and compaction score when finding a tablet for compaction diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index cc1df07c3f904a..114cb101ccda5c 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -806,6 +806,7 @@ OLAPStatus StorageEngine::create_tablet(const TCreateTabletReq& request) { LOG(WARNING) << "there is no available disk that can be used to create tablet."; return OLAP_ERR_CE_CMD_PARAMS_ERROR; } + TRACE("got data directory for create tablet"); return _tablet_manager->create_tablet(request, stores); } diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 1fd47c68329a03..1567698a6bff9b 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -51,6 +51,7 @@ #include "util/pretty_printer.h" #include "util/scoped_cleanup.h" #include "util/time.h" +#include "util/trace.h" using std::list; using std::map; @@ -211,6 +212,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, << ", schema_hash=" << schema_hash; WriteLock wlock(_get_tablets_shard_lock(tablet_id)); + TRACE("got tablets shard lock"); // Make create_tablet operation to be idempotent: // 1. Return true if tablet with same tablet_id and schema_hash exist; // false if tablet with same tablet_id but different schema_hash exist. @@ -251,6 +253,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, stores.clear(); stores.push_back(base_tablet->data_dir()); } + TRACE("got base tablet"); // set alter type to schema-change. it is useless TabletSharedPtr tablet = _internal_create_tablet_unlocked( @@ -260,6 +263,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request, DorisMetrics::instance()->create_tablet_requests_failed->increment(1); return OLAP_ERR_CE_CMD_PARAMS_ERROR; } + TRACE("succeed to create tablet"); LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; @@ -282,6 +286,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( if (tablet == nullptr) { return nullptr; } + TRACE("create tablet meta"); int64_t new_tablet_id = request.tablet_id; int32_t new_schema_hash = request.tablet_schema.schema_hash; @@ -314,6 +319,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( LOG(WARNING) << "fail to create initial version for tablet. res=" << res; break; } + TRACE("create initial rowset"); } if (is_schema_change) { if (request.__isset.base_tablet_id && request.base_tablet_id > 0) { @@ -341,6 +347,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( int64_t new_creation_time = base_tablet->creation_time() + 1; tablet->set_creation_time(new_creation_time); } + TRACE("update schema change info"); } // Add tablet to StorageEngine will make it visible to user res = _add_tablet_unlocked(new_tablet_id, new_schema_hash, tablet, true, false); @@ -358,6 +365,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( LOG(WARNING) << "fail to get tablet. res=" << res; break; } + TRACE("add tablet to StorageEngine"); } while (0); if (res == OLAP_SUCCESS) { @@ -373,6 +381,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( tablet->delete_all_files(); TabletMetaManager::remove(data_dir, new_tablet_id, new_schema_hash); } + TRACE("revert changes on error"); return nullptr; }