Merge pull request #2950 from DataDog/ivoanjo/prof-7440-improve-threa…

…d-naming [PROF-7440] Use invoke location as a fallback for nameless threads in the profiler
DataDog · Jul 7, 2023 · 4890f92 · 4890f92
2 parents 8d303a9 + 87bd64e
commit 4890f92
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 9 deletions.
diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c
@@ -64,6 +64,7 @@
 // ---
 
 #define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
+#define THREAD_INVOKE_LOCATION_LIMIT_CHARS 512
 #define IS_WALL_TIME true
 #define IS_NOT_WALL_TIME false
 #define MISSING_TRACER_CONTEXT_KEY 0
@@ -117,6 +118,8 @@ struct thread_context_collector_state {
 struct per_thread_context {
   char thread_id[THREAD_ID_LIMIT_CHARS];
   ddog_CharSlice thread_id_char_slice;
+  char thread_invoke_location[THREAD_INVOKE_LOCATION_LIMIT_CHARS];
+  ddog_CharSlice thread_invoke_location_char_slice;
   thread_cpu_time_id thread_cpu_time_id;
   long cpu_time_at_previous_sample_ns;  // Can be INVALID_TIME until initialized or if getting it fails for another reason
   long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
@@ -660,6 +663,13 @@ static void trigger_sample_for_thread(
       .key = DDOG_CHARSLICE_C("thread name"),
       .str = main_thread_name
     };
+  } else {
+    // For other threads without name, we use the "invoke location" (first file:line of the block used to start the thread), if any.
+    // This is what Ruby shows in `Thread#to_s`.
+    labels[label_pos++] = (ddog_prof_Label) {
+      .key = DDOG_CHARSLICE_C("thread name"),
+      .str = thread_context->thread_invoke_location_char_slice // This is an empty string if no invoke location was available
+    };
   }
 
   struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
@@ -757,6 +767,25 @@ static void initialize_context(VALUE thread, struct per_thread_context *thread_c
   snprintf(thread_context->thread_id, THREAD_ID_LIMIT_CHARS, "%"PRIu64" (%lu)", native_thread_id_for(thread), (unsigned long) thread_id_for(thread));
   thread_context->thread_id_char_slice = (ddog_CharSlice) {.ptr = thread_context->thread_id, .len = strlen(thread_context->thread_id)};
 
+  int invoke_line_location;
+  VALUE invoke_file_location = invoke_location_for(thread, &invoke_line_location);
+  if (invoke_file_location != Qnil) {
+    snprintf(
+      thread_context->thread_invoke_location,
+      THREAD_INVOKE_LOCATION_LIMIT_CHARS,
+      "%s:%d",
+      StringValueCStr(invoke_file_location),
+      invoke_line_location
+    );
+  } else {
+    snprintf(thread_context->thread_invoke_location, THREAD_INVOKE_LOCATION_LIMIT_CHARS, "%s", "");
+  }
+
+  thread_context->thread_invoke_location_char_slice = (ddog_CharSlice) {
+    .ptr = thread_context->thread_invoke_location,
+    .len = strlen(thread_context->thread_invoke_location)
+  };
+
   thread_context->thread_cpu_time_id = thread_cpu_time_id_for(thread);
 
   // These will get initialized during actual sampling
@@ -810,6 +839,7 @@ static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value
 
   VALUE arguments[] = {
     ID2SYM(rb_intern("thread_id")),                       /* => */ rb_str_new2(thread_context->thread_id),
+    ID2SYM(rb_intern("thread_invoke_location")),          /* => */ rb_str_new2(thread_context->thread_invoke_location),
     ID2SYM(rb_intern("thread_cpu_time_id_valid?")),       /* => */ thread_context->thread_cpu_time_id.valid ? Qtrue : Qfalse,
     ID2SYM(rb_intern("thread_cpu_time_id")),              /* => */ CLOCKID2NUM(thread_context->thread_cpu_time_id.clock_id),
     ID2SYM(rb_intern("cpu_time_at_previous_sample_ns")),  /* => */ LONG2NUM(thread_context->cpu_time_at_previous_sample_ns),

diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb
@@ -158,6 +158,9 @@ def add_compiler_flag(flag)
 # On older Rubies, rb_global_vm_lock_struct did not include the owner field
 $defs << '-DNO_GVL_OWNER' if RUBY_VERSION < '2.6'
 
+# On older Rubies, there was no thread->invoke_arg
+$defs << '-DNO_THREAD_INVOKE_ARG' if RUBY_VERSION < '2.6'
+
 # On older Rubies, we need to use rb_thread_t instead of rb_execution_context_t
 $defs << '-DUSE_THREAD_INSTEAD_OF_EXECUTION_CONTEXT' if RUBY_VERSION < '2.5'
 

diff --git a/ext/ddtrace_profiling_native_extension/private_vm_api_access.c b/ext/ddtrace_profiling_native_extension/private_vm_api_access.c
@@ -772,3 +772,37 @@ check_method_entry(VALUE obj, int can_be_svar)
   // they're always on the main Ractor
   bool ddtrace_rb_ractor_main_p(void) { return true; }
 #endif // NO_RACTORS
+
+// This is a tweaked and inlined version of
+// threadptr_invoke_proc_location + rb_proc_location + iseq_location .
+//
+// It's useful to have here because not all of the methods above are accessible to extensions + to avoid the
+// array allocation that iseq_location did to contain its return value.
+static const rb_iseq_t *maybe_thread_invoke_proc_iseq(VALUE thread_value) {
+  rb_thread_t *thread = thread_struct_from_object(thread_value);
+
+  #ifndef NO_THREAD_INVOKE_ARG // Ruby 2.6+
+    if (thread->invoke_type != thread_invoke_type_proc) return NULL;
+
+    VALUE proc = thread->invoke_arg.proc.proc;
+  #else
+    if (thread->first_func || !thread->first_proc) return NULL;
+
+    VALUE proc = thread->first_proc;
+  #endif
+
+  const rb_iseq_t *iseq = rb_proc_get_iseq(proc, 0);
+  if (iseq == NULL) return NULL;
+
+  rb_iseq_check(iseq);
+  return iseq;
+}
+
+VALUE invoke_location_for(VALUE thread, int *line_location) {
+  const rb_iseq_t *iseq = maybe_thread_invoke_proc_iseq(thread);
+
+  if (iseq == NULL) return Qnil;
+
+  *line_location = NUM2INT(rb_iseq_first_lineno(iseq));
+  return rb_iseq_path(iseq);
+}
diff --git a/ext/ddtrace_profiling_native_extension/private_vm_api_access.h b/ext/ddtrace_profiling_native_extension/private_vm_api_access.h
@@ -43,3 +43,9 @@ bool ddtrace_rb_ractor_main_p(void);
 
 // See comment on `record_placeholder_stack_in_native_code` for a full explanation of what this means (and why we don't just return 0)
 #define PLACEHOLDER_STACK_IN_NATIVE_CODE -1
+
+// This method provides the file and line of the "invoke location" of a thread (first file:line of the block used to
+// start the thread), if any.
+// This is what Ruby shows in `Thread#to_s`.
+// The file is returned directly, and the line is recorded onto *line_location.
+VALUE invoke_location_for(VALUE thread, int *line_location);
diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb
@@ -116,20 +116,31 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
         .to include(*[Thread.main, t1, t2, t3].map(&:object_id))
     end
 
-    it 'includes the thread names, if available' do
+    it 'includes the thread names' do
       t1.name = 'thread t1'
-      t2.name = nil
-      t3.name = 'thread t3'
+      t2.name = 'thread t2'
 
       sample
 
       t1_sample = samples_for_thread(samples, t1).first
       t2_sample = samples_for_thread(samples, t2).first
-      t3_sample = samples_for_thread(samples, t3).first
 
       expect(t1_sample.labels).to include(:'thread name' => 'thread t1')
-      expect(t2_sample.labels.keys).to_not include(:'thread name')
-      expect(t3_sample.labels).to include(:'thread name' => 'thread t3')
+      expect(t2_sample.labels).to include(:'thread name' => 'thread t2')
+    end
+
+    context 'when no thread names are available' do
+      # NOTE: As of this writing, the dd-trace-rb spec_helper.rb includes a monkey patch to Thread creation that we use
+      # to track specs that leak threads. This means that the invoke_location of every thread will point at the
+      # spec_helper in our test suite. Just in case you're looking at the output and being a bit confused :)
+      it 'uses the thread_invoke_location as a thread name' do
+        t1.name = nil
+        sample
+        t1_sample = samples_for_thread(samples, t1).first
+
+        expect(t1_sample.labels).to include(:'thread name' => per_thread_context.fetch(t1).fetch(:thread_invoke_location))
+        expect(t1_sample.labels).to include(:'thread name' => match(/.+\.rb:\d+/))
+      end
     end
 
     it 'includes a fallback name for the main thread, when not set' do
@@ -918,13 +929,10 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
         Thread.current.name = 'thread_with_name'
         sample_allocation(weight: 123)
       end.join
-      thread_without_name = Thread.new { sample_allocation(weight: 123) }.join
 
       sample_with_name = samples_for_thread(samples, thread_with_name).first
-      sample_without_name = samples_for_thread(samples, thread_without_name).first
 
       expect(sample_with_name.labels).to include(:'thread name' => 'thread_with_name')
-      expect(sample_without_name.labels).to_not include(:'thread name')
     end
 
     describe 'code hotspots' do
@@ -1086,6 +1094,24 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current
           end
         end
       end
+
+      describe ':thread_invoke_location' do
+        it 'is empty for the main thread' do
+          expect(per_thread_context.fetch(Thread.main).fetch(:thread_invoke_location)).to be_empty
+        end
+
+        # NOTE: As of this writing, the dd-trace-rb spec_helper.rb includes a monkey patch to Thread creation that we use
+        # to track specs that leak threads. This means that the invoke_location of every thread will point at the
+        # spec_helper in our test suite. Just in case you're looking at the output and being a bit confused :)
+        it 'contains the file and line for the started threads' do
+          [t1, t2, t3].each do |thread|
+            invoke_location = per_thread_context.fetch(thread).fetch(:thread_invoke_location)
+
+            expect(thread.inspect).to include(invoke_location)
+            expect(invoke_location).to match(/.+\.rb:\d+/)
+          end
+        end
+      end
     end
 
     context 'after sampling multiple times' do