Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use jemalloc to trace thread allocation #8844

Merged
merged 42 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5987e73
z
CalvinNeo Mar 13, 2024
79d847c
instant
CalvinNeo Mar 14, 2024
279e7a9
fix
CalvinNeo Mar 14, 2024
51aa25b
fix
CalvinNeo Mar 14, 2024
d8ea43b
zzzzz
CalvinNeo Mar 14, 2024
e0705b1
fmt
CalvinNeo Mar 14, 2024
c73606a
h
CalvinNeo Mar 14, 2024
aede44c
f
CalvinNeo Mar 14, 2024
eb416dc
Merge branch 'master' into jemalloc-alloc
CalvinNeo Mar 14, 2024
33c06a9
co
CalvinNeo Mar 15, 2024
c433699
Update dbms/src/Storages/KVStore/FFI/ProxyFFI.cpp
CalvinNeo Mar 15, 2024
95cb885
Merge branch 'jemalloc-alloc' of github.com:CalvinNeo/tics into jemal…
CalvinNeo Mar 15, 2024
c2a78f7
fix
CalvinNeo Mar 15, 2024
dc8159d
t1
CalvinNeo Mar 15, 2024
8232c97
t1
CalvinNeo Mar 15, 2024
b662e97
reset
CalvinNeo Mar 15, 2024
c3fb1a4
fix
CalvinNeo Mar 15, 2024
f8f5039
z
CalvinNeo Mar 15, 2024
22f54b2
update proxy
CalvinNeo Mar 15, 2024
54635f6
Update dbms/src/Server/Server.cpp
CalvinNeo Mar 15, 2024
1f5b6b1
add tests
CalvinNeo Mar 15, 2024
ca7e114
Merge branch 'jemalloc-alloc' of github.com:CalvinNeo/tics into jemal…
CalvinNeo Mar 15, 2024
c99a306
fix
CalvinNeo Mar 15, 2024
41c865d
Update dbms/src/Storages/KVStore/FFI/ProxyFFI.cpp
CalvinNeo Mar 15, 2024
f9c4ed3
f
CalvinNeo Mar 15, 2024
1050cdf
f
CalvinNeo Mar 15, 2024
0f39d24
f
CalvinNeo Mar 15, 2024
9a5a426
x
CalvinNeo Mar 18, 2024
d9d141d
x
CalvinNeo Mar 18, 2024
b15004a
x
CalvinNeo Mar 18, 2024
a97525b
Update dbms/src/Common/TiFlashMetrics.cpp
CalvinNeo Mar 18, 2024
b45c47a
x
CalvinNeo Mar 18, 2024
0d82b86
Merge branch 'jemalloc-alloc' of github.com:CalvinNeo/tics into jemal…
CalvinNeo Mar 18, 2024
5fa20a1
df
CalvinNeo Mar 18, 2024
b384af1
df2
CalvinNeo Mar 18, 2024
0759459
fix
CalvinNeo Mar 18, 2024
25170e0
proxy
CalvinNeo Mar 18, 2024
5d10880
Merge remote-tracking branch 'upstream/master' into jemalloc-alloc
CalvinNeo Mar 18, 2024
d3356ac
fix2
CalvinNeo Mar 18, 2024
2eead98
fix2
CalvinNeo Mar 18, 2024
b41346b
ft
CalvinNeo Mar 18, 2024
a9aaafa
ft
CalvinNeo Mar 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions contrib/tiflash-proxy-cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,18 @@

if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR SAN_DEBUG)
set(_TIFLASH_PROXY_BUILD_PROFILE "debug")
set(_TIFLASH_PROXY_MAKE_COMMAND make debug)
if (ENABLE_JEMALLOC)
set(_TIFLASH_PROXY_MAKE_COMMAND ENABLE_FEATURES="external-jemalloc" make debug)
else()
set(_TIFLASH_PROXY_MAKE_COMMAND make debug)
endif()
else()
set(_TIFLASH_PROXY_BUILD_PROFILE "release")
set(_TIFLASH_PROXY_MAKE_COMMAND make release)
if (ENABLE_JEMALLOC)
set(_TIFLASH_PROXY_MAKE_COMMAND ENABLE_FEATURES="external-jemalloc" make release)
else()
set(_TIFLASH_PROXY_MAKE_COMMAND make release)
endif()
endif()

set(_TIFLASH_PROXY_SOURCE_DIR "${TiFlash_SOURCE_DIR}/contrib/tiflash-proxy")
Expand Down
37 changes: 37 additions & 0 deletions dbms/src/Common/MemoryTrace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 PingCAP, Inc.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This filename is easy to confuse with MemoryTracker.cpp.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed into MemoryAllocTracker

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/MemoryTrace.h>

#ifdef USE_JEMALLOC
#include <jemalloc/jemalloc.h>
#endif

namespace DB
{
std::tuple<uint64_t *, uint64_t *> getAllocDeallocPtr()
{
#ifdef USE_JEMALLOC
uint64_t * ptr1 = nullptr;
uint64_t size1 = sizeof ptr1;
je_mallctl("thread.allocatedp", (void *)&ptr1, &size1, nullptr, 0);
uint64_t * ptr2 = nullptr;
uint64_t size2 = sizeof ptr2;
je_mallctl("thread.deallocatedp", (void *)&ptr2, &size2, nullptr, 0);
return std::make_tuple(ptr1, ptr2);
#else
return std::make_tuple(nullptr, nullptr);
#endif
}
} // namespace DB
22 changes: 22 additions & 0 deletions dbms/src/Common/MemoryTrace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <tuple>

namespace DB
{
std::tuple<uint64_t *, uint64_t *> getAllocDeallocPtr();
} // namespace DB
35 changes: 34 additions & 1 deletion dbms/src/Common/TiFlashMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ TiFlashMetrics::TiFlashMetrics()
.Name("tiflash_storage_sync_replica_ru")
.Help("RU for synchronous replica of keyspace")
.Register(*registry);
registered_raft_proxy_thread_memory_usage_family
= &prometheus::BuildGauge().Name(raft_proxy_thread_memory_usage).Help("").Register(*registry);
}

void TiFlashMetrics::addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru)
Expand Down Expand Up @@ -96,4 +98,35 @@ void TiFlashMetrics::removeReplicaSyncRUCounter(UInt32 keyspace_id)
registered_keyspace_sync_replica_ru_family->Remove(itr->second);
registered_keyspace_sync_replica_ru.erase(itr);
}
} // namespace DB

double TiFlashMetrics::getProxyThreadMemory(const std::string & k)
{
std::shared_lock lock(proxy_thread_report_mtx);
auto it = registered_raft_proxy_thread_memory_usage_metrics.find(k);
RUNTIME_CHECK(it != registered_raft_proxy_thread_memory_usage_metrics.end(), k);
return it->second->Value();
}

void TiFlashMetrics::setProxyThreadMemory(const std::string & k, Int64 v)
{
std::shared_lock lock(proxy_thread_report_mtx);
if unlikely (!registered_raft_proxy_thread_memory_usage_metrics.count(k))
{
// New metrics added through `Reset`.
return;
}
registered_raft_proxy_thread_memory_usage_metrics[k]->Set(v);
}

void TiFlashMetrics::registerProxyThreadMemory(const std::string & k)
{
std::unique_lock lock(proxy_thread_report_mtx);
if unlikely (!registered_raft_proxy_thread_memory_usage_metrics.count(k))
{
registered_raft_proxy_thread_memory_usage_metrics.emplace(
k,
&registered_raft_proxy_thread_memory_usage_family->Add({{"type", k}}));
}
}

} // namespace DB
9 changes: 9 additions & 0 deletions dbms/src/Common/TiFlashMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,9 @@ class TiFlashMetrics

void addReplicaSyncRU(UInt32 keyspace_id, UInt64 ru);
UInt64 debugQueryReplicaSyncRU(UInt32 keyspace_id);
void setProxyThreadMemory(const std::string & k, Int64 v);
double getProxyThreadMemory(const std::string & k);
void registerProxyThreadMemory(const std::string & k);

private:
TiFlashMetrics();
Expand All @@ -1098,6 +1101,7 @@ class TiFlashMetrics
static constexpr auto profile_events_prefix = "tiflash_system_profile_event_";
static constexpr auto current_metrics_prefix = "tiflash_system_current_metric_";
static constexpr auto async_metrics_prefix = "tiflash_system_asynchronous_metric_";
static constexpr auto raft_proxy_thread_memory_usage = "tiflash_raft_proxy_thread_memory_usage";

std::shared_ptr<prometheus::Registry> registry = std::make_shared<prometheus::Registry>();
// Here we add a ProcessCollector to collect cpu/rss/vsize/start_time information.
Expand All @@ -1118,6 +1122,11 @@ class TiFlashMetrics
std::mutex replica_sync_ru_mtx;
std::unordered_map<KeyspaceID, prometheus::Counter *> registered_keyspace_sync_replica_ru;

// TODO: Use CAS+HazPtr to remove proxy_thread_report_mtx, or hash some slots here.
prometheus::Family<prometheus::Gauge> * registered_raft_proxy_thread_memory_usage_family;
std::shared_mutex proxy_thread_report_mtx;
std::unordered_map<std::string, prometheus::Gauge *> registered_raft_proxy_thread_memory_usage_metrics;

public:
#define MAKE_METRIC_MEMBER_M(family_name, help, type, ...) \
MetricFamily<prometheus::type> family_name \
Expand Down
8 changes: 8 additions & 0 deletions dbms/src/Server/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,12 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "UniPS is not enabled for proxy, page_version={}", STORAGE_FORMAT_CURRENT.page);
}

#ifdef USE_JEMALLOC
LOG_INFO(log, "Using Jemalloc for TiFlash");
#else
LOG_INFO(log, "Not using Jemalloc for TiFlash");
#endif

RaftStoreProxyRunner proxy_runner(RaftStoreProxyRunner::RunRaftStoreProxyParms{&helper, proxy_conf}, log);

if (proxy_conf.is_proxy_runnable)
Expand Down Expand Up @@ -1708,6 +1714,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_ERROR(log, "Current status of engine-store is NOT Running, should not happen");
exit(-1);
}
LOG_INFO(log, "Stop collecting thread alloc metrics");
tmt_context.getKVStore()->stopThreadAllocInfo();
LOG_INFO(log, "Set store context status Stopping");
tmt_context.setStatusStopping();
{
Expand Down
40 changes: 40 additions & 0 deletions dbms/src/Storages/KVStore/FFI/ProxyFFI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1030,4 +1030,44 @@ BaseBuffView GetLockByKey(const EngineStoreServerWrap * server, uint64_t region_
}
}

void ReportThreadAllocateInfo(
EngineStoreServerWrap * server,
uint64_t tid,
BaseBuffView name,
ReportThreadAllocateInfoType type,
uint64_t value)
{
try
{
UNUSED(tid);
if (!server || !server->tmt || !server->tmt->getKVStore())
return;
server->tmt->getKVStore()->reportThreadAllocInfo(buffToStrView(name), type, value);
}
catch (...)
{
tryLogCurrentFatalException(__PRETTY_FUNCTION__);
exit(-1);
}
}

void ReportThreadAllocateBatch(
EngineStoreServerWrap * server,
uint64_t tid,
BaseBuffView name,
ReportThreadAllocateInfoBatch data)
{
try
{
UNUSED(server);
UNUSED(tid);
KVStore::reportThreadAllocBatch(buffToStrView(name), data);
}
catch (...)
{
tryLogCurrentFatalException(__PRETTY_FUNCTION__);
exit(-1);
}
}

} // namespace DB
14 changes: 13 additions & 1 deletion dbms/src/Storages/KVStore/FFI/ProxyFFI.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,17 @@ FapSnapshotState QueryFapSnapshotState(
uint64_t term);
void ClearFapSnapshot(EngineStoreServerWrap * server, uint64_t region_id);
bool KvstoreRegionExists(EngineStoreServerWrap * server, uint64_t region_id);
void ReportThreadAllocateInfo(
EngineStoreServerWrap *,
uint64_t tid,
BaseBuffView name,
ReportThreadAllocateInfoType type,
uint64_t value);
void ReportThreadAllocateBatch(
EngineStoreServerWrap *,
uint64_t tid,
BaseBuffView name,
ReportThreadAllocateInfoBatch data);
}

inline EngineStoreServerHelper GetEngineStoreServerHelper(EngineStoreServerWrap * tiflash_instance_wrap)
Expand Down Expand Up @@ -257,7 +268,8 @@ inline EngineStoreServerHelper GetEngineStoreServerHelper(EngineStoreServerWrap
.fn_fast_add_peer = FastAddPeer,
.fn_query_fap_snapshot_state = QueryFapSnapshotState,
.fn_clear_fap_snapshot = ClearFapSnapshot,
.fn_kvstore_region_exists = KvstoreRegionExists,
.fn_report_thread_allocate_info = ReportThreadAllocateInfo,
.fn_report_thread_allocate_batch = ReportThreadAllocateBatch,
};
}

Expand Down
Loading