Skip to content

Commit

Permalink
Cleanup JVMTI sampling implementation (#174)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbachorik authored Jan 28, 2025
1 parent c03c1c8 commit bf74bf8
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 147 deletions.
6 changes: 6 additions & 0 deletions ddprof-lib/src/main/cpp/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@

#include <stddef.h>

#ifdef _LP64
# define LP64_ONLY(code) code
#else // !_LP64
# define LP64_ONLY(code)
#endif // _LP64

typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
Expand Down
5 changes: 3 additions & 2 deletions ddprof-lib/src/main/cpp/callTraceStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "os.h"
#include <string.h>

#define COMMA ,

static const u32 INITIAL_CAPACITY = 65536;
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
Expand Down Expand Up @@ -81,8 +83,7 @@ class LongHashTable {
}
};

CallTrace CallTraceStorage::_overflow_trace = {
false, 1, {BCI_ERROR, (jmethodID) "storage_overflow"}};
CallTrace CallTraceStorage::_overflow_trace = {false, 1, {BCI_ERROR, LP64_ONLY(0 COMMA) (jmethodID)"storage_overflow"}};

CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK), _lock(0) {
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);
Expand Down
28 changes: 5 additions & 23 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,14 @@ void LivenessTracker::cleanup_table(bool forced) {
if (target != i) {
_table[target] = _table[i]; // will clone TrackingEntry at 'i'
_table[i].ref = nullptr; // will nullify the original ref
assert(_table[i].frames == _table[target].frames);
_table[i].frames = nullptr; // will nullify the original frames
assert(_table[target].frames != nullptr);
_table[i].call_trace_id = 0;
}
assert(_table[target].ref != nullptr &&
_table[target].frames != nullptr);
_table[target].age += epoch_diff;
} else {
jweak tmpRef = _table[i].ref;
_table[i].ref = nullptr;
env->DeleteWeakGlobalRef(tmpRef);

jvmtiFrameInfo *tmpFrames = _table[i].frames;
_table[i].frames = nullptr;
assert(_table[i].ref == nullptr && _table[i].frames == nullptr);
delete[] tmpFrames;
_table[i].call_trace_id = 0;
}
}

Expand Down Expand Up @@ -119,8 +111,6 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
for (int i = 0; i < (sz = _table_size); i++) {
jobject ref = env->NewLocalRef(_table[i].ref);
if (ref != nullptr) {
assert(_table[i].frames != nullptr);

if (tracked_thread_ids != nullptr) {
tracked_thread_ids->insert(_table[i].tid);
}
Expand All @@ -141,9 +131,7 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
: 0;
env->ReleaseStringUTFChars(name_str, name);

Profiler::instance()->recordExternalSample(
1, _table[i].tid, _table[i].frames, _table[i].frames_size,
/*truncated=*/false, BCI_LIVENESS, &event);
Profiler::instance()->recordDeferredSample(_table[i].tid, _table[i].call_trace_id, BCI_LIVENESS, &event);
}

env->DeleteLocalRef(ref);
Expand Down Expand Up @@ -292,8 +280,7 @@ Error LivenessTracker::initialize(Arguments &args) {
}

void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
jobject object, int num_frames,
jvmtiFrameInfo *frames) {
jobject object, u32 call_trace_id) {
if (!_enabled) {
// disabled
return;
Expand Down Expand Up @@ -340,12 +327,7 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
_table[idx].alloc = event;
_table[idx].skipped = skipped;
_table[idx].age = 0;
_table[idx].frames_size = num_frames;
_table[idx].frames = new jvmtiFrameInfo[_table[idx].frames_size];
if (frames != nullptr) {
memcpy(_table[idx].frames, frames,
sizeof(jvmtiFrameInfo) * _table[idx].frames_size);
}
_table[idx].call_trace_id = call_trace_id;
_table[idx].ctx = Contexts::get(tid);
}

Expand Down
6 changes: 2 additions & 4 deletions ddprof-lib/src/main/cpp/livenessTracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ typedef struct TrackingEntry {
jweak ref;
AllocEvent alloc;
double skipped;
jint frames_size;
jvmtiFrameInfo *frames;
u32 call_trace_id;
jint tid;
jlong time;
jlong age;
Expand Down Expand Up @@ -100,8 +99,7 @@ class LivenessTracker {

Error start(Arguments &args);
void stop();
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object,
int num_frames, jvmtiFrameInfo *frames);
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object, u32 call_trace_id);
void flush(std::set<int> &tracked_thread_ids);

static void JNICALL GarbageCollectionFinish(jvmtiEnv *jvmti_env);
Expand Down
34 changes: 5 additions & 29 deletions ddprof-lib/src/main/cpp/objectSampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
event._id = id;
}

jint frames_size = 0;
jvmtiFrameInfo *frames = nullptr;

u32 call_trace_id = 0;
// we do record the details and stacktraces only for when recording
// allocations or liveness
if (_record_allocations || _record_liveness) {
Expand All @@ -78,31 +76,14 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
? 1
: 1 / (1 - exp(-size / (double)_interval)));

frames = new jvmtiFrameInfo[_max_stack_depth];
call_trace_id = Profiler::instance()->recordJVMTISample(size, tid, thread, BCI_ALLOC, &event, !_record_allocations);

if (jvmti->GetStackTrace(thread, 0, _max_stack_depth, frames,
&frames_size) != JVMTI_ERROR_NONE ||
frames_size <= 0) {
delete[] frames;
if (call_trace_id == 0) {
return;
}

if (frames_size > 0) {
std::set<jclass> classes;
jclass method_class;
for (int i = 0; i < frames_size; i++) {
if (jvmti->GetMethodDeclaringClass(frames[i].method, &method_class) ==
0) {
classes.insert(method_class);
}
}
}
}

if (_record_allocations) {
Profiler::instance()->recordExternalSample(
size, tid, frames, frames_size, /*truncated=*/false, BCI_ALLOC, &event);

u64 current_samples = __sync_add_and_fetch(&_alloc_event_count, 1);
// in order to lower the number of atomic reads from the timestamp variable
// the check will be performed only each N samples
Expand Down Expand Up @@ -130,15 +111,10 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
}

// Either we are recording liveness or tracking GC generations (lightweight
// livenss samples)
// liveness samples)
if (_gc_generations || _record_liveness) {
LivenessTracker::instance()->track(jni, event, tid, object, frames_size,
frames);
LivenessTracker::instance()->track(jni, event, tid, object, call_trace_id);
}

// it's safe to delete frames - the liveness tracker keeps a full copy of the
// frames and manages its own memory
delete[] frames;
}

Error ObjectSampler::check(Arguments &args) {
Expand Down
106 changes: 39 additions & 67 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "profiler.h"
#include "asyncSampleMutex.h"
#include "common.h"
#include "context.h"
#include "counters.h"
#include "ctimer.h"
Expand Down Expand Up @@ -548,51 +549,6 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames,
return trace.frames - frames + 1;
}

int Profiler::getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int start_depth,
int max_depth) {
int num_frames;
if (VM::jvmti()->GetStackTrace(NULL, start_depth, _max_stack_depth,
jvmti_frames, &num_frames) == 0 &&
num_frames > 0) {
return convertFrames(jvmti_frames, frames, num_frames);
}
return 0;
}

int Profiler::getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int max_depth) {
// We cannot call pure JVM TI here, because it assumes _thread_in_native
// state, but allocation events happen in _thread_in_vm state, see
// https://github.com/jvm-profiling-tools/java-profiler/issues/64
JNIEnv *jni = VM::jni();
if (jni == NULL) {
return 0;
}

JitWriteProtection jit(false);
VMThread *vm_thread = VMThread::fromEnv(jni);
int num_frames;
if (VMStructs::_get_stack_trace(NULL, vm_thread, 0, max_depth, jvmti_frames,
&num_frames) == 0 &&
num_frames > 0) {
return convertFrames(jvmti_frames, frames, num_frames);
}
return 0;
}

inline int Profiler::convertFrames(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int num_frames) {
// Convert to AsyncGetCallTrace format.
// Note: jvmti_frames and frames may overlap.
for (int i = 0; i < num_frames; i++) {
jint bci = jvmti_frames[i].location;
frames[i].method_id = jvmti_frames[i].method;
frames[i].bci = bci;
}
return num_frames;
}

void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
NMethod *nmethod) {
if (nmethod->isNMethod() && nmethod->isAlive()) {
Expand Down Expand Up @@ -634,10 +590,7 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
}
}

void Profiler::recordExternalSample(u64 counter, int tid,
jvmtiFrameInfo *jvmti_frames,
jint num_jvmti_frames, bool truncated,
jint event_type, Event *event) {
u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) {
atomicInc(_total_samples);

u32 lock_index = getLockIndex(tid);
Expand All @@ -647,29 +600,50 @@ void Profiler::recordExternalSample(u64 counter, int tid,
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);

if (event_type == BCI_CPU && _cpu_engine == &perf_events) {
// Need to reset PerfEvents ring buffer, even though we discard the
// collected trace
PerfEvents::resetBuffer(tid);
}
return;
return 0;
}
u32 call_trace_id = 0;
if (!_omit_stacktraces && jvmti_frames != nullptr) {
if (!_omit_stacktraces) {
ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames;
jvmtiFrameInfo *jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;

int num_frames = 0;
if (!_jfr.active() && BCI_ALLOC >= event_type && event_type >= BCI_PARK &&
event->_id) {
num_frames = makeFrame(frames, event_type, event->_id);

if (VM::jvmti()->GetStackTrace(thread, 0, _max_stack_depth, jvmti_frames, &num_frames) == JVMTI_ERROR_NONE && num_frames > 0) {
// Convert to AsyncGetCallTrace format.
// Note: jvmti_frames and frames may overlap.
for (int i = 0; i < num_frames; i++) {
jint bci = jvmti_frames[i].location;
jmethodID mid = jvmti_frames[i].method;
frames[i].method_id = mid;
frames[i].bci = bci;
// see https://github.com/async-profiler/async-profiler/pull/1090
LP64_ONLY(frames[i].padding = 0;)
}
}

num_frames +=
convertFrames(jvmti_frames, frames + num_frames, num_jvmti_frames);
call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter);
}
if (!deferred) {
_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);
}

_locks[lock_index].unlock();
return call_trace_id;
}

call_trace_id =
_call_trace_storage.put(num_frames, frames, truncated, counter);
void Profiler::recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event) {
atomicInc(_total_samples);

u32 lock_index = getLockIndex(tid);
if (!_locks[lock_index].tryLock() &&
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
// Too many concurrent signals already
atomicInc(_failures[-ticks_skipped]);
return;
}

_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);

_locks[lock_index].unlock();
Expand Down Expand Up @@ -1153,13 +1127,11 @@ Error Profiler::start(Arguments &args, bool reset) {
// (Re-)allocate calltrace buffers
if (_max_stack_depth != args._jstackdepth) {
_max_stack_depth = args._jstackdepth;
size_t buffer_size =
(_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) *
sizeof(CallTraceBuffer);
size_t nelem = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES;

for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
free(_calltrace_buffer[i]);
_calltrace_buffer[i] = (CallTraceBuffer *)malloc(buffer_size);
_calltrace_buffer[i] = (CallTraceBuffer*)calloc(nelem, sizeof(CallTraceBuffer));
if (_calltrace_buffer[i] == NULL) {
_max_stack_depth = 0;
return Error("Not enough memory to allocate stack trace buffers (try "
Expand Down
14 changes: 4 additions & 10 deletions ddprof-lib/src/main/cpp/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ const int RESERVED_FRAMES = 4;

enum EventMask { EM_CPU = 1 << 0, EM_WALL = 1 << 1, EM_ALLOC = 1 << 2 };

struct CallTraceBuffer {
union CallTraceBuffer {
ASGCT_CallFrame _asgct_frames[1];
jvmtiFrameInfo _jvmti_frames[1];
};

class FrameName;
Expand Down Expand Up @@ -138,12 +139,6 @@ class Profiler {
int tid, StackContext *java_ctx, bool *truncated);
int getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, int max_depth,
StackContext *java_ctx, bool *truncated);
int getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
int start_depth, int max_depth);
int getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
ASGCT_CallFrame *frames, int max_depth);
int convertFrames(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
int num_frames);
void fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
NMethod *nmethod);
void updateThreadName(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread,
Expand Down Expand Up @@ -223,9 +218,8 @@ class Profiler {
ASGCT_CallFrame *frames);
void recordSample(void *ucontext, u64 weight, int tid, jint event_type,
u32 call_trace_id, Event *event);
void recordExternalSample(u64 weight, int tid, jvmtiFrameInfo *jvmti_frames,
jint num_jvmti_frames, bool truncated,
jint event_type, Event *event);
u32 recordJVMTISample(u64 weight, int tid, jthread thread, jint event_type, Event *event, bool deferred);
void recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event);
void recordExternalSample(u64 weight, int tid, int num_frames,
ASGCT_CallFrame *frames, bool truncated,
jint event_type, Event *event);
Expand Down
4 changes: 4 additions & 0 deletions ddprof-lib/src/main/cpp/vmEntry.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <jvmti.h>

#include "arch.h"
#include "codeCache.h"
#include "frame.h"

Expand Down Expand Up @@ -62,9 +63,12 @@ enum ASGCT_Failure {

typedef struct {
jint bci;
// see https://github.com/async-profiler/async-profiler/pull/1090
LP64_ONLY(jint padding;)
jmethodID method_id;
} ASGCT_CallFrame;


typedef struct {
JNIEnv *env;
jint num_frames;
Expand Down
Loading

0 comments on commit bf74bf8

Please sign in to comment.