From f5a3279ca8d69f78a4ee57ed1fbe5d39566ae2f9 Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Thu, 27 Oct 2022 13:15:58 +0100
Subject: [PATCH 1/6] Refactor `trigger_sample_for_thread` in preparation for
 adding more labels

---
 .../collectors_cpu_and_wall_time.c            | 27 ++++++++++++-------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
index 4797f80e4cd..4f9960c9e98 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
@@ -508,26 +508,35 @@ static void trigger_sample_for_thread(
   ddog_Slice_i64 metric_values_slice,
   sample_type type
 ) {
-  VALUE thread_name = thread_name_for(thread);
-  bool have_thread_name = thread_name != Qnil;
-
-  int label_count = 1 + (have_thread_name ? 1 : 0);
-  ddog_Label labels[label_count];
+  int max_label_count =
+    1 + // thread id
+    1; // thread name
+  ddog_Label labels[max_label_count];
+  int label_pos = 0;
+
+  labels[label_pos++] = (ddog_Label) {
+    .key = DDOG_CHARSLICE_C("thread id"),
+    .str = thread_context->thread_id_char_slice
+  };
 
-  labels[0] = (ddog_Label) {.key = DDOG_CHARSLICE_C("thread id"), .str = thread_context->thread_id_char_slice};
-  if (have_thread_name) {
-    labels[1] = (ddog_Label) {
+  VALUE thread_name = thread_name_for(thread);
+  if (thread_name != Qnil) {
+    labels[label_pos++] = (ddog_Label) {
       .key = DDOG_CHARSLICE_C("thread name"),
       .str = char_slice_from_ruby_string(thread_name)
     };
   }
 
+  if (label_pos > max_label_count) {
+    rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
+  }
+
   sample_thread(
     thread,
     state->sampling_buffer,
     state->recorder_instance,
     metric_values_slice,
-    (ddog_Slice_label) {.ptr = labels, .len = label_count},
+    (ddog_Slice_label) {.ptr = labels, .len = label_pos},
     type
   );
 }

From 1f6af60c59b59b8a055f06cb890131583a7871b2 Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Fri, 28 Oct 2022 12:23:39 +0100
Subject: [PATCH 2/6] Add note about monotonic clock going backwards spuriously

---
 .../collectors_cpu_and_wall_time.c                        | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
index 4f9960c9e98..ba3e3e16732 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
@@ -457,7 +457,9 @@ VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
     // We don't expect non-wall time to go backwards, so let's flag this as a bug
     if (gc_cpu_time_elapsed_ns < 0) rb_raise(rb_eRuntimeError, "BUG: Unexpected negative gc_cpu_time_elapsed_ns between samples");
     // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
-    // was a bug
+    // was a bug.
+    // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
+    // https://github.com/DataDog/dd-trace-rb/pull/2336.
     if (gc_wall_time_elapsed_ns < 0) gc_wall_time_elapsed_ns = 0;
 
     if (thread_context->gc_tracking.wall_time_at_start_ns == 0 && thread_context->gc_tracking.wall_time_at_finish_ns != 0) {
@@ -699,7 +701,9 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
   if (elapsed_time_ns < 0) {
     if (is_wall_time) {
       // Wall-time can actually go backwards (e.g. when the system clock gets set) so we can't assume time going backwards
-      // was a bug
+      // was a bug.
+      // @ivoanjo: I've also observed time going backwards spuriously on macOS, see discussion on
+      // https://github.com/DataDog/dd-trace-rb/pull/2336.
       elapsed_time_ns = 0;
     } else {
       // We don't expect non-wall time to go backwards, so let's flag this as a bug

From 42ddbadee7d46b96fa22d7bddab8f042a36f0bbc Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Fri, 28 Oct 2022 14:59:11 +0100
Subject: [PATCH 3/6] Document potential issues in signal handler function

Right now we don't particularly need to do anything more, but just in
case we evolve it in the future, this note may come in handy.
---
 .../collectors_cpu_and_wall_time_worker.c                      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
index 9bc576a376d..6f044985cfe 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
@@ -280,6 +280,9 @@ static void block_sigprof_signal_handler_from_running_in_current_thread(void) {
   pthread_sigmask(SIG_BLOCK, &signals_to_block, NULL);
 }
 
+// NOTE: Remember that this will run in the thread and within the scope of user code, including user C code.
+// We need to be careful not to change any state that may be observed OR to restore it if we do. For instance, if anything
+// we do here can set `errno`, then we must be careful to restore the old `errno` after the fact.
 static void handle_sampling_signal(DDTRACE_UNUSED int _signal, DDTRACE_UNUSED siginfo_t *_info, DDTRACE_UNUSED void *_ucontext) {
   if (!ruby_thread_has_gvl_p()) {
     return; // Not safe to enqueue a sample from this thread

From b63650aebe4b14b40f6515290726edc364224c17 Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Thu, 27 Oct 2022 13:31:29 +0100
Subject: [PATCH 4/6] [PROF-4905] Implement support for code hotspots in
 CpuAndWallTime collector

**What does this PR do?**:

This PR reimplements support for the code hotspots feature (connecting
traces to profiles) in the `CpuAndWallTime` collector.

Although the implementation is different, the approach is conceptually
the same as provided by `Datadog::Profiling::TraceIdentifiers::Ddtrace`
for the old profiler codepaths.

Key contraints in this implementation are:
* No objects are created to get this information
* No Ruby code is called to get this information

These constraints are key to:
* Maintain performance on the `CpuAndWallTime` collector (we never get
  out of C code).
* Keep precise control of the Global VM Lock. Since we never get out of
  C, we take the sample in one pass. Calling into Ruby code may mean
  being "interrupted" in the middle of sampling, which would break
  the internal consistency of the profiler. (Assuming that we never lose
  the GVL makes the profiler simpler and more performant).
* Correctness of customer code. Sampling happens from customer threads
  directly (using `rb_postponed_job_register_one`) so we don't want to
  call Ruby methods and potentially trigger side effects during a sample.
* Memory profiling. We don't yet implement allocation or heap
  profiling, but it's coming soon and we don't want to allocate memory
  to track memory allocations.

Thus, I chose to implement this feature by accessing the tracer context
directly from the sampling process. Because all the information needed
is available as instance variables of the objects, they can be read
safely and within the needed constraints.

This implementation also includes extensive checks to make sure that
if tracing evolves or is not available, profiling safely skips
getting the information.

Furthermore, it includes an integration test with a real tracer,
so that we'll immediately know when an incompatibility is introduced,
so that the profiler can be updated.

**Motivation**:

Code hotspots is a key feature of our APM, allowing customers to see
the profile for an individual trace.

**Additional Notes**:

This first implementation does not include support for endpoint
aggregation; this will come in a separate PR.

**How to test the change?**:

This change includes code coverage. It can also be tested by
enabling profiling and tracing on a Ruby app, and checking that
the "Code Hotspots" tab on a generated trace is showing the
profile for that trace.
---
 .../collectors_cpu_and_wall_time.c            | 90 ++++++++++++++++-
 lib/datadog/core/configuration/components.rb  |  3 +-
 .../profiling/collectors/cpu_and_wall_time.rb | 14 ++-
 .../collectors/cpu_and_wall_time_worker.rb    |  3 +-
 .../core/configuration/components_spec.rb     |  1 +
 .../collectors/cpu_and_wall_time_spec.rb      | 96 ++++++++++++++++++-
 .../cpu_and_wall_time_worker_spec.rb          |  4 +-
 7 files changed, 200 insertions(+), 11 deletions(-)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
index ba3e3e16732..d480a49cd24 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
@@ -64,6 +64,12 @@
 #define DO_NOT_RAISE_ON_FAILURE false
 #define IS_WALL_TIME true
 #define IS_NOT_WALL_TIME false
+#define MISSING_TRACER_CONTEXT_KEY 0
+
+static ID at_active_trace_id; // id of :@active_trace in Ruby
+static ID at_root_span_id;    // id of :@root_span in Ruby
+static ID at_active_span_id;  // id of :@active_span in Ruby
+static ID at_id_id;           // id of :@id in Ruby
 
 // Contains state for a single CpuAndWallTime instance
 struct cpu_and_wall_time_collector_state {
@@ -76,6 +82,10 @@ struct cpu_and_wall_time_collector_state {
   st_table *hash_map_per_thread_context;
   // Datadog::Profiling::StackRecorder instance
   VALUE recorder_instance;
+  // If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
+  // to enable code hotspots and endpoint aggregation.
+  // When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
+  ID tracer_context_key;
   // Track how many regular samples we've taken. Does not include garbage collection samples.
   // Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
   // is not (just) a stat.
@@ -110,12 +120,23 @@ struct per_thread_context {
   } gc_tracking;
 };
 
+// Used to correlate profiles with traces
+struct trace_identifiers {
+  #define MAXIMUM_LENGTH_64_BIT_IDENTIFIER 21 // Why 21? 2^64 => 20 digits + 1 for \0
+
+  bool valid;
+  ddog_CharSlice local_root_span_id;
+  ddog_CharSlice span_id;
+  char local_root_span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
+  char span_id_buffer[MAXIMUM_LENGTH_64_BIT_IDENTIFIER];
+};
+
 static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr);
 static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr);
 static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
 static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
 static VALUE _native_new(VALUE klass);
-static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames);
+static VALUE _native_initialize(VALUE self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key);
 static VALUE _native_sample(VALUE self, VALUE collector_instance);
 static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
 static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
@@ -143,6 +164,7 @@ static long cpu_time_now_ns(struct per_thread_context *thread_context);
 static long wall_time_now_ns(bool raise_on_failure);
 static long thread_id_for(VALUE thread);
 static VALUE _native_stats(VALUE self, VALUE collector_instance);
+static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
 
 void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
   VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
@@ -160,7 +182,7 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
   // https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
   rb_define_alloc_func(collectors_cpu_and_wall_time_class, _native_new);
 
-  rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 3);
+  rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_initialize", _native_initialize, 4);
   rb_define_singleton_method(collectors_cpu_and_wall_time_class, "_native_inspect", _native_inspect, 1);
   rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 1);
   rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
@@ -169,6 +191,11 @@ void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
   rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
   rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
   rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
+
+  at_active_trace_id = rb_intern_const("@active_trace");
+  at_root_span_id = rb_intern_const("@root_span");
+  at_active_span_id = rb_intern_const("@active_span");
+  at_id_id = rb_intern_const("@id");
 }
 
 // This structure is used to define a Ruby object that stores a pointer to a struct cpu_and_wall_time_collector_state
@@ -234,11 +261,12 @@ static VALUE _native_new(VALUE klass) {
    // "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
     st_init_numtable();
   state->recorder_instance = Qnil;
+  state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
 
   return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
 }
 
-static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames) {
+static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE recorder_instance, VALUE max_frames, VALUE tracer_context_key) {
   struct cpu_and_wall_time_collector_state *state;
   TypedData_Get_Struct(collector_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);
 
@@ -250,6 +278,14 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE collector_inst
   // hash_map_per_thread_context is already initialized, nothing to do here
   state->recorder_instance = enforce_recorder_instance(recorder_instance);
 
+  if (RTEST(tracer_context_key)) {
+    ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
+    // Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
+    // In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
+    // the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
+    state->tracer_context_key = rb_to_id(tracer_context_key);
+  }
+
   return Qtrue;
 }
 
@@ -512,7 +548,8 @@ static void trigger_sample_for_thread(
 ) {
   int max_label_count =
     1 + // thread id
-    1; // thread name
+    1 + // thread name
+    2;  // local root span id and span id
   ddog_Label labels[max_label_count];
   int label_pos = 0;
 
@@ -529,6 +566,14 @@ static void trigger_sample_for_thread(
     };
   }
 
+  struct trace_identifiers trace_identifiers_result = {.valid = false};
+  trace_identifiers_for(state, thread, &trace_identifiers_result);
+
+  if (trace_identifiers_result.valid) {
+    labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .str = trace_identifiers_result.local_root_span_id};
+    labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
+  }
+
   if (label_pos > max_label_count) {
     rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
   }
@@ -601,6 +646,8 @@ static VALUE _native_inspect(DDTRACE_UNUSED VALUE _self, VALUE collector_instanc
   // Update this when modifying state struct
   rb_str_concat(result, rb_sprintf(" hash_map_per_thread_context=%"PRIsVALUE, per_thread_context_st_table_as_ruby_hash(state)));
   rb_str_concat(result, rb_sprintf(" recorder_instance=%"PRIsVALUE, state->recorder_instance));
+  VALUE tracer_context_key = state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY ? Qnil : ID2SYM(state->tracer_context_key);
+  rb_str_concat(result, rb_sprintf(" tracer_context_key=%+"PRIsVALUE, tracer_context_key));
   rb_str_concat(result, rb_sprintf(" sample_count=%u", state->sample_count));
   rb_str_concat(result, rb_sprintf(" stats=%"PRIsVALUE, stats_as_ruby_hash(state)));
 
@@ -772,3 +819,38 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
 
   return stats_as_ruby_hash(state);
 }
+
+static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
+  if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;
+
+  VALUE current_context = rb_thread_local_aref(thread, state->tracer_context_key);
+  if (current_context == Qnil) return;
+
+  VALUE active_trace = rb_ivar_get(current_context, at_active_trace_id /* @active_trace */);
+  if (active_trace == Qnil) return;
+
+  VALUE root_span = rb_ivar_get(active_trace, at_root_span_id /* @root_span */);
+  VALUE active_span = rb_ivar_get(active_trace, at_active_span_id /* @active_span */);
+  if (root_span == Qnil || active_span == Qnil) return;
+
+  VALUE numeric_local_root_span_id = rb_ivar_get(root_span, at_id_id /* @id */);
+  VALUE numeric_span_id = rb_ivar_get(active_span, at_id_id /* @id */);
+  if (numeric_local_root_span_id == Qnil || numeric_span_id == Qnil) return;
+
+  unsigned long long local_root_span_id = NUM2ULL(numeric_local_root_span_id);
+  unsigned long long span_id = NUM2ULL(numeric_span_id);
+
+  snprintf(trace_identifiers_result->local_root_span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", local_root_span_id);
+  snprintf(trace_identifiers_result->span_id_buffer, MAXIMUM_LENGTH_64_BIT_IDENTIFIER, "%llu", span_id);
+
+  trace_identifiers_result->local_root_span_id = (ddog_CharSlice) {
+    .ptr = trace_identifiers_result->local_root_span_id_buffer,
+    .len = strlen(trace_identifiers_result->local_root_span_id_buffer)
+  };
+  trace_identifiers_result->span_id = (ddog_CharSlice) {
+    .ptr = trace_identifiers_result->span_id_buffer,
+    .len = strlen(trace_identifiers_result->span_id_buffer)
+  };
+
+  trace_identifiers_result->valid = true;
+}
diff --git a/lib/datadog/core/configuration/components.rb b/lib/datadog/core/configuration/components.rb
index dcda77f1935..c51f3b68e52 100644
--- a/lib/datadog/core/configuration/components.rb
+++ b/lib/datadog/core/configuration/components.rb
@@ -251,7 +251,8 @@ def build_profiler(settings, agent_settings, tracer)
               recorder = Datadog::Profiling::StackRecorder.new
               collector = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new(
                 recorder: recorder,
-                max_frames: settings.profiling.advanced.max_frames
+                max_frames: settings.profiling.advanced.max_frames,
+                tracer: tracer,
               )
             else
               trace_identifiers_helper = Profiling::TraceIdentifiers::Helper.new(
diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time.rb
index 77a403c1753..254da6b3ac8 100644
--- a/lib/datadog/profiling/collectors/cpu_and_wall_time.rb
+++ b/lib/datadog/profiling/collectors/cpu_and_wall_time.rb
@@ -11,8 +11,9 @@ module Collectors
       #
       # Methods prefixed with _native_ are implemented in `collectors_cpu_and_wall_time.c`
       class CpuAndWallTime
-        def initialize(recorder:, max_frames:)
-          self.class._native_initialize(self, recorder, max_frames)
+        def initialize(recorder:, max_frames:, tracer:)
+          tracer_context_key = safely_extract_context_key_from(tracer)
+          self.class._native_initialize(self, recorder, max_frames, tracer_context_key)
         end
 
         def inspect
@@ -21,6 +22,15 @@ def inspect
           result[-1] = "#{self.class._native_inspect(self)}>"
           result
         end
+
+        private
+
+        def safely_extract_context_key_from(tracer)
+          tracer &&
+            tracer.respond_to?(:provider) &&
+            # NOTE: instance_variable_get always works, even on nil -- it just returns nil if the variable doesn't exist
+            tracer.provider.instance_variable_get(:@context).instance_variable_get(:@key)
+        end
       end
     end
   end
diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
index 41854df4290..0f745634141 100644
--- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
+++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb
@@ -18,7 +18,8 @@ class CpuAndWallTimeWorker
         def initialize(
           recorder:,
           max_frames:,
-          cpu_and_wall_time_collector: CpuAndWallTime.new(recorder: recorder, max_frames: max_frames)
+          tracer:,
+          cpu_and_wall_time_collector: CpuAndWallTime.new(recorder: recorder, max_frames: max_frames, tracer: tracer)
         )
           self.class._native_initialize(self, cpu_and_wall_time_collector)
           @worker_thread = nil
diff --git a/spec/datadog/core/configuration/components_spec.rb b/spec/datadog/core/configuration/components_spec.rb
index 4df349dbe3f..0ed623e40d1 100644
--- a/spec/datadog/core/configuration/components_spec.rb
+++ b/spec/datadog/core/configuration/components_spec.rb
@@ -1030,6 +1030,7 @@
           expect(Datadog::Profiling::Collectors::CpuAndWallTimeWorker).to receive(:new).with(
             recorder: instance_of(Datadog::Profiling::StackRecorder),
             max_frames: settings.profiling.advanced.max_frames,
+            tracer: tracer,
           )
 
           build_profiler
diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb
index 39c7fdd062f..1238d725b50 100644
--- a/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb
+++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_spec.rb
@@ -41,8 +41,9 @@
   end
   let(:samples) { samples_from_pprof(pprof_result) }
   let(:invalid_time) { -1 }
+  let(:tracer) { nil }
 
-  subject(:cpu_and_wall_time_collector) { described_class.new(recorder: recorder, max_frames: max_frames) }
+  subject(:cpu_and_wall_time_collector) { described_class.new(recorder: recorder, max_frames: max_frames, tracer: tracer) }
 
   after do
     [t1, t2, t3].each do |thread|
@@ -275,6 +276,99 @@ def stats
         end
       end
     end
+
+    describe 'code hotspots' do
+      let(:t1_sample) do
+        samples.find { |it| it.fetch(:labels).fetch(:'thread id') == t1.object_id.to_s }
+      end
+
+      shared_examples_for 'samples without code hotspots information' do
+        it 'samples successfully' do
+          sample
+
+          expect(t1_sample).to_not be_nil
+        end
+
+        it 'does not include "local root span id" nor "span id" labels in the samples' do
+          sample
+
+          found_labels = t1_sample.fetch(:labels).keys
+
+          expect(found_labels).to_not include(:'local root span id')
+          expect(found_labels).to_not include(:'span id')
+
+          expect(found_labels).to include(:'thread id') # Sanity check
+        end
+      end
+
+      context 'when there is no tracer instance available' do
+        let(:tracer) { nil }
+        it_behaves_like 'samples without code hotspots information'
+      end
+
+      context 'when tracer has no provider API' do
+        let(:tracer) { double('Tracer without provider API') }
+        it_behaves_like 'samples without code hotspots information'
+      end
+
+      context 'when tracer provider is nil' do
+        let(:tracer) { double('Tracer with nil provider', provider: nil) }
+        it_behaves_like 'samples without code hotspots information'
+      end
+
+      context 'when there is a tracer instance available' do
+        let(:tracer) { Datadog::Tracing.send(:tracer) }
+
+        after { Datadog::Tracing.shutdown! }
+
+        context 'when thread does not have a tracer context' do
+          # NOTE: Since t1 is newly created for this test, and never had any active trace, it won't have a context
+          it_behaves_like 'samples without code hotspots information'
+        end
+
+        context 'when thread has a tracer context, but no trace is in progress' do
+          before { tracer.active_trace(t1) } # Trigger context setting
+          it_behaves_like 'samples without code hotspots information'
+        end
+
+        context 'when thread has a tracer context, and a trace is in progress' do
+          let(:t1) do
+            Thread.new(ready_queue) do |ready_queue|
+              Datadog::Tracing.trace('profiler.test') do
+                Datadog::Tracing.trace('profiler.test.inner') do |span, trace|
+                  @t1_span_id = span.id
+                  @t1_local_root_span_id = trace.send(:root_span).id
+                  ready_queue << true
+                  sleep
+                end
+              end
+            end
+          end
+
+          before do
+            # Sanity checks
+            expect(@t1_span_id).to be > 0
+            expect(@t1_local_root_span_id).to be > 0
+            expect(@t1_span_id).to_not be @t1_local_root_span_id
+          end
+
+          it 'samples successfully' do
+            sample
+
+            expect(t1_sample).to_not be_nil
+          end
+
+          it 'includes "local root span id" and "span id" labels in the samples' do
+            sample
+
+            expect(t1_sample.fetch(:labels)).to include(
+              :'local root span id' => @t1_local_root_span_id.to_s,
+              :'span id' => @t1_span_id.to_s,
+            )
+          end
+        end
+      end
+    end
   end
 
   describe '#on_gc_start' do
diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
index 05c9be0ab5c..c5f9a2130e6 100644
--- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
+++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
@@ -8,7 +8,7 @@
 
   let(:recorder) { Datadog::Profiling::StackRecorder.new }
 
-  subject(:cpu_and_wall_time_worker) { described_class.new(recorder: recorder, max_frames: 400) }
+  subject(:cpu_and_wall_time_worker) { described_class.new(recorder: recorder, max_frames: 400, tracer: nil) }
 
   describe '.new' do
     it 'creates the garbage collection tracepoint in the disabled state' do
@@ -47,7 +47,7 @@
 
       allow(Datadog.logger).to receive(:warn)
 
-      another_instance = described_class.new(recorder: Datadog::Profiling::StackRecorder.new, max_frames: 400)
+      another_instance = described_class.new(recorder: Datadog::Profiling::StackRecorder.new, max_frames: 400, tracer: nil)
       another_instance.start
 
       exception = try_wait_until(backoff: 0.01) { another_instance.send(:failure_exception) }

From bb95c1c737f237a94d2c76e1856d94a142e358ca Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Tue, 1 Nov 2022 09:25:54 +0000
Subject: [PATCH 5/6] Add note about validation of `label_pos`

---
 .../collectors_cpu_and_wall_time.c                            | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
index d480a49cd24..c5588792280 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
@@ -574,6 +574,10 @@ static void trigger_sample_for_thread(
     labels[label_pos++] = (ddog_Label) {.key = DDOG_CHARSLICE_C("span id"), .str = trace_identifiers_result.span_id};
   }
 
+  // The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
+  // forgot to update max_label_count" in the future, we've also added this validation.
+  // @ivoanjo: I wonder if C compilers are smart enough to statically prove when this check never triggers happens and
+  // remove it entirely.
   if (label_pos > max_label_count) {
     rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
   }

From c7f6401bdb066fa37c188509cf000c7ebbb90be2 Mon Sep 17 00:00:00 2001
From: Ivo Anjo <ivo.anjo@datadoghq.com>
Date: Tue, 1 Nov 2022 09:41:52 +0000
Subject: [PATCH 6/6] Document that `trace_identifiers_for` gets called while
 holding the GVL

---
 .../collectors_cpu_and_wall_time.c                               | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
index c5588792280..0d41fd02f71 100644
--- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
+++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time.c
@@ -824,6 +824,7 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE _self, VALUE collector_instance)
   return stats_as_ruby_hash(state);
 }
 
+// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
 static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result) {
   if (state->tracer_context_key == MISSING_TRACER_CONTEXT_KEY) return;