Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@
#include "util/doris_metrics.h"
#include "util/file_utils.h"
#include "util/monotime.h"
#include "util/scoped_cleanup.h"
#include "util/stopwatch.hpp"
#include "util/threadpool.h"
#include "util/trace.h"

using std::deque;
using std::list;
Expand Down Expand Up @@ -257,6 +259,7 @@ void TaskWorkerPool::_remove_task_info(const TTaskType::type task_type, int64_t
EnumToString(TTaskType, task_type, type_str);
LOG(INFO) << "remove task info. type=" << type_str << ", signature=" << signature
<< ", queue_size=" << queue_size;
TRACE("remove task info");
}

void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request) {
Expand All @@ -278,6 +281,7 @@ void TaskWorkerPool::_finish_task(const TFinishTaskRequest& finish_task_request)
}
sleep(config::sleep_one_second);
}
TRACE("finish task");
}

uint32_t TaskWorkerPool::_get_next_task_index(int32_t thread_count,
Expand Down Expand Up @@ -324,6 +328,17 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() {
_tasks.pop_front();
}

scoped_refptr<Trace> trace(new Trace);
MonotonicStopWatch watch;
watch.start();
SCOPED_CLEANUP({
if (watch.elapsed_time() / 1e9 > config::agent_task_trace_threshold_sec) {
LOG(WARNING) << "Trace:" << std::endl << trace->DumpToString(Trace::INCLUDE_ALL);
}
});
ADOPT_TRACE(trace.get());
TRACE("start to create tablet $0", create_tablet_req.tablet_id);

TStatusCode::type status_code = TStatusCode::OK;
std::vector<string> error_msgs;
TStatus task_status;
Expand Down Expand Up @@ -351,6 +366,7 @@ void TaskWorkerPool::_create_tablet_worker_thread_callback() {
tablet_info.__set_path_hash(tablet->data_dir()->path_hash());
finish_tablet_infos.push_back(tablet_info);
}
TRACE("StorageEngine create tablet finish, status: $0", create_status);

task_status.__set_status_code(status_code);
task_status.__set_error_msgs(error_msgs);
Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ CONF_mInt64(row_step_for_compaction_merge_log, "0");
CONF_mInt32(base_compaction_trace_threshold, "10");
CONF_mInt32(cumulative_compaction_trace_threshold, "2");

// Threshold to logging agent task trace, in seconds.
CONF_mInt32(agent_task_trace_threshold_sec, "2");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please update hte be_config.md document?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, this PR has been merged before I update docs, I'll do it in next PR.


// time interval to record tablet scan count in second for the purpose of calculating tablet scan frequency
CONF_mInt64(tablet_scan_frequency_time_node_interval_second, "300");
// coefficient for tablet scan frequency and compaction score when finding a tablet for compaction
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/storage_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,7 @@ OLAPStatus StorageEngine::create_tablet(const TCreateTabletReq& request) {
LOG(WARNING) << "there is no available disk that can be used to create tablet.";
return OLAP_ERR_CE_CMD_PARAMS_ERROR;
}
TRACE("got data directory for create tablet");
return _tablet_manager->create_tablet(request, stores);
}

Expand Down
9 changes: 9 additions & 0 deletions be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "util/pretty_printer.h"
#include "util/scoped_cleanup.h"
#include "util/time.h"
#include "util/trace.h"

using std::list;
using std::map;
Expand Down Expand Up @@ -211,6 +212,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request,
<< ", schema_hash=" << schema_hash;

WriteLock wlock(_get_tablets_shard_lock(tablet_id));
TRACE("got tablets shard lock");
// Make create_tablet operation to be idempotent:
// 1. Return true if tablet with same tablet_id and schema_hash exist;
// false if tablet with same tablet_id but different schema_hash exist.
Expand Down Expand Up @@ -251,6 +253,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request,
stores.clear();
stores.push_back(base_tablet->data_dir());
}
TRACE("got base tablet");

// set alter type to schema-change. it is useless
TabletSharedPtr tablet = _internal_create_tablet_unlocked(
Expand All @@ -260,6 +263,7 @@ OLAPStatus TabletManager::create_tablet(const TCreateTabletReq& request,
DorisMetrics::instance()->create_tablet_requests_failed->increment(1);
return OLAP_ERR_CE_CMD_PARAMS_ERROR;
}
TRACE("succeed to create tablet");

LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id
<< ", schema_hash=" << schema_hash;
Expand All @@ -282,6 +286,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
if (tablet == nullptr) {
return nullptr;
}
TRACE("create tablet meta");

int64_t new_tablet_id = request.tablet_id;
int32_t new_schema_hash = request.tablet_schema.schema_hash;
Expand Down Expand Up @@ -314,6 +319,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
LOG(WARNING) << "fail to create initial version for tablet. res=" << res;
break;
}
TRACE("create initial rowset");
}
if (is_schema_change) {
if (request.__isset.base_tablet_id && request.base_tablet_id > 0) {
Expand Down Expand Up @@ -341,6 +347,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
int64_t new_creation_time = base_tablet->creation_time() + 1;
tablet->set_creation_time(new_creation_time);
}
TRACE("update schema change info");
}
// Add tablet to StorageEngine will make it visible to user
res = _add_tablet_unlocked(new_tablet_id, new_schema_hash, tablet, true, false);
Expand All @@ -358,6 +365,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
LOG(WARNING) << "fail to get tablet. res=" << res;
break;
}
TRACE("add tablet to StorageEngine");
} while (0);

if (res == OLAP_SUCCESS) {
Expand All @@ -373,6 +381,7 @@ TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
tablet->delete_all_files();
TabletMetaManager::remove(data_dir, new_tablet_id, new_schema_hash);
}
TRACE("revert changes on error");
return nullptr;
}

Expand Down