-
Notifications
You must be signed in to change notification settings - Fork 373
/
collectors_thread_context.c
2032 lines (1731 loc) · 101 KB
/
collectors_thread_context.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <ruby.h>
#include "collectors_thread_context.h"
#include "clock_id.h"
#include "collectors_stack.h"
#include "collectors_gc_profiling_helper.h"
#include "helpers.h"
#include "libdatadog_helpers.h"
#include "private_vm_api_access.h"
#include "stack_recorder.h"
#include "time_helpers.h"
// Used to trigger sampling of threads, based on external "events", such as:
// * periodic timer for cpu-time and wall-time
// * VM garbage collection events
// * VM object allocation events
//
// This file implements the native bits of the Datadog::Profiling::Collectors::ThreadContext class
//
// Triggering of this component (e.g. watching for the above "events") is implemented by Collectors::CpuAndWallTimeWorker.
// ---
// ## Tracking of cpu-time and wall-time spent during garbage collection
//
// This feature works by having a special state that a thread can be in: doing garbage collection. This state is
// tracked inside the thread's `per_thread_context.gc_tracking` data, and three functions, listed below. The functions
// will get called by the `Collectors::CpuAndWallTimeWorker` at very specific times in the VM lifetime.
//
// * `thread_context_collector_on_gc_start`: Called at the very beginning of the garbage collection process.
// The internal VM `during_gc` flag is set to `true`, but Ruby has not done any work yet.
// * `thread_context_collector_on_gc_finish`: Called at the very end of the garbage collection process.
// The internal VM `during_gc` flag is still set to `true`, but all the work has been done.
// * `thread_context_collector_sample_after_gc`: Called shortly after the garbage collection process.
// The internal VM `during_gc` flag is set to `false`.
//
// Inside this component, here's what happens inside those three functions:
//
// When `thread_context_collector_on_gc_start` gets called, the current cpu and wall-time get recorded to the thread
// context: `cpu_time_at_gc_start_ns` and `wall_time_at_gc_start_ns`.
//
// While `cpu_time_at_gc_start_ns` is set, regular samples (if any) do not account for cpu-time any time that passes
// after this timestamp. The idea is that this cpu-time will be blamed separately on GC, and not on the user thread.
// Wall-time accounting is not affected by this (e.g. we still record 60 seconds every 60 seconds).
//
// (Regular samples can still account for the cpu-time between the previous sample and the start of GC.)
//
// When `thread_context_collector_on_gc_finish` gets called, the cpu-time and wall-time spent during GC gets recorded
// into the global gc_tracking structure, and further samples are not affected. (The `cpu_time_at_previous_sample_ns`
// of the thread that did GC also gets adjusted to avoid double-accounting.)
//
// Finally, when `thread_context_collector_sample_after_gc` gets called, a sample gets recorded with a stack having
// a single placeholder `Garbage Collection` frame. This sample gets
// assigned the cpu-time and wall-time that was recorded between calls to `on_gc_start` and `on_gc_finish`, as well
// as metadata for the last GC.
//
// Note that the Ruby GC does not usually do all of the GC work in one go. Instead, it breaks it up into smaller steps
// so that the application can keep doing user work in between GC steps.
// The `on_gc_start` / `on_gc_finish` will trigger each time the VM executes these smaller steps, and on a benchmark
// that executes `Object.new` in a loop, I measured more than 50k of this steps per second (!!).
// Creating these many events for every GC step is a lot of overhead, so instead `on_gc_finish` coalesces time
// spent in GC and only flushes it at most every 10 ms/every complete GC collection. This reduces the amount of
// individual GC events we need to record. We use the latest GC metadata for this event, reflecting the last GC that
// happened in the coalesced period.
//
// In an earlier attempt at implementing this functionality (https://github.com/DataDog/dd-trace-rb/pull/2308), we
// discovered that we needed to factor the sampling work away from `thread_context_collector_on_gc_finish` and into a
// separate `thread_context_collector_sample_after_gc` because (as documented in more detail below),
// `sample_after_gc` could trigger memory allocation in rare occasions (usually exceptions), which is actually not
// allowed to happen during Ruby's garbage collection start/finish hooks.
// ---
#define THREAD_ID_LIMIT_CHARS 44 // Why 44? "#{2**64} (#{2**64})".size + 1 for \0
#define THREAD_INVOKE_LOCATION_LIMIT_CHARS 512
#define IS_WALL_TIME true
#define IS_NOT_WALL_TIME false
#define MISSING_TRACER_CONTEXT_KEY 0
#define TIME_BETWEEN_GC_EVENTS_NS MILLIS_AS_NS(10)
// This is used as a placeholder to mark threads that are allowed to be profiled (enabled)
// (e.g. to avoid trying to gvl profile threads that are not from the main Ractor)
// and for which there's no data yet
#define GVL_WAITING_ENABLED_EMPTY RUBY_FIXNUM_MAX
static ID at_active_span_id; // id of :@active_span in Ruby
static ID at_active_trace_id; // id of :@active_trace in Ruby
static ID at_id_id; // id of :@id in Ruby
static ID at_resource_id; // id of :@resource in Ruby
static ID at_root_span_id; // id of :@root_span in Ruby
static ID at_type_id; // id of :@type in Ruby
static ID at_otel_values_id; // id of :@otel_values in Ruby
static ID at_parent_span_id_id; // id of :@parent_span_id in Ruby
static ID at_datadog_trace_id; // id of :@datadog_trace in Ruby
// Used to support reading trace identifiers from the opentelemetry Ruby library when the ddtrace gem tracing
// integration is NOT in use.
static ID at_span_id_id; // id of :@span_id in Ruby
static ID at_trace_id_id; // id of :@trace_id in Ruby
static ID at_entries_id; // id of :@entries in Ruby
static ID at_context_id; // id of :@context in Ruby
static ID at_kind_id; // id of :@kind in Ruby
static ID at_name_id; // id of :@name in Ruby
static ID server_id; // id of :server in Ruby
static ID otel_context_storage_id; // id of :__opentelemetry_context_storage__ in Ruby
// This is used by `thread_context_collector_on_gvl_running`. Because when that method gets called we're not sure if
// it's safe to access the state of the thread context collector, we store this setting as a global value. This does
// mean this setting is shared among all thread context collectors, and thus it's "last writer wins".
// In production this should not be a problem: there should only be one profiler, which is the last one created,
// and that'll be the one that last wrote this setting.
static uint32_t global_waiting_for_gvl_threshold_ns = MILLIS_AS_NS(10);
typedef enum { OTEL_CONTEXT_ENABLED_FALSE, OTEL_CONTEXT_ENABLED_ONLY, OTEL_CONTEXT_ENABLED_BOTH } otel_context_enabled;
// Contains state for a single ThreadContext instance
struct thread_context_collector_state {
// Note: Places in this file that usually need to be changed when this struct is changed are tagged with
// "Update this when modifying state struct"
// Required by Datadog::Profiling::Collectors::Stack as a scratch buffer during sampling
ddog_prof_Location *locations;
uint16_t max_frames;
// Hashmap <Thread Object, struct per_thread_context>
st_table *hash_map_per_thread_context;
// Datadog::Profiling::StackRecorder instance
VALUE recorder_instance;
// If the tracer is available and enabled, this will be the fiber-local symbol for accessing its running context,
// to enable code hotspots and endpoint aggregation.
// When not available, this is set to MISSING_TRACER_CONTEXT_KEY.
ID tracer_context_key;
// Track how many regular samples we've taken. Does not include garbage collection samples.
// Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
// is not (just) a stat.
unsigned int sample_count;
// Reusable array to get list of threads
VALUE thread_list_buffer;
// Used to omit endpoint names (retrieved from tracer) from collected data
bool endpoint_collection_enabled;
// Used to omit timestamps / timeline events from collected data
bool timeline_enabled;
// Used to control context collection
otel_context_enabled otel_context_enabled;
// Used when calling monotonic_to_system_epoch_ns
monotonic_to_system_epoch_state time_converter_state;
// Used to identify the main thread, to give it a fallback name
VALUE main_thread;
// Used when extracting trace identifiers from otel spans. Lazily initialized.
// Qtrue serves as a marker we've not yet extracted it; when we try to extract it, we set it to an object if
// successful and Qnil if not.
VALUE otel_current_span_key;
struct stats {
// Track how many garbage collection samples we've taken.
unsigned int gc_samples;
// See thread_context_collector_on_gc_start for details
unsigned int gc_samples_missed_due_to_missing_context;
} stats;
struct {
unsigned long accumulated_cpu_time_ns;
unsigned long accumulated_wall_time_ns;
long wall_time_at_previous_gc_ns; // Will be INVALID_TIME unless there's accumulated time above
long wall_time_at_last_flushed_gc_event_ns; // Starts at 0 and then will always be valid
} gc_tracking;
};
// Tracks per-thread state
struct per_thread_context {
sampling_buffer *sampling_buffer;
char thread_id[THREAD_ID_LIMIT_CHARS];
ddog_CharSlice thread_id_char_slice;
char thread_invoke_location[THREAD_INVOKE_LOCATION_LIMIT_CHARS];
ddog_CharSlice thread_invoke_location_char_slice;
thread_cpu_time_id thread_cpu_time_id;
long cpu_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized or if getting it fails for another reason
long wall_time_at_previous_sample_ns; // Can be INVALID_TIME until initialized
struct {
// Both of these fields are set by on_gc_start and kept until on_gc_finish is called.
// Outside of this window, they will be INVALID_TIME.
long cpu_time_at_start_ns;
long wall_time_at_start_ns;
} gc_tracking;
};
// Used to correlate profiles with traces
struct trace_identifiers {
bool valid;
uint64_t local_root_span_id;
uint64_t span_id;
VALUE trace_endpoint;
};
struct otel_span {
VALUE span;
VALUE span_id;
VALUE trace_id;
};
static void thread_context_collector_typed_data_mark(void *state_ptr);
static void thread_context_collector_typed_data_free(void *state_ptr);
static int hash_map_per_thread_context_mark(st_data_t key_thread, st_data_t _value, st_data_t _argument);
static int hash_map_per_thread_context_free_values(st_data_t _thread, st_data_t value_per_thread_context, st_data_t _argument);
static VALUE _native_new(VALUE klass);
static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self);
static VALUE _native_sample(VALUE self, VALUE collector_instance, VALUE profiler_overhead_stack_thread);
static VALUE _native_on_gc_start(VALUE self, VALUE collector_instance);
static VALUE _native_on_gc_finish(VALUE self, VALUE collector_instance);
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state);
static void update_metrics_and_sample(
struct thread_context_collector_state *state,
VALUE thread_being_sampled,
VALUE stack_from_thread,
struct per_thread_context *thread_context,
sampling_buffer* sampling_buffer,
long current_cpu_time_ns,
long current_monotonic_wall_time_ns
);
static void trigger_sample_for_thread(
struct thread_context_collector_state *state,
VALUE thread,
VALUE stack_from_thread,
struct per_thread_context *thread_context,
sampling_buffer* sampling_buffer,
sample_values values,
long current_monotonic_wall_time_ns,
ddog_CharSlice *ruby_vm_type,
ddog_CharSlice *class_name,
bool is_gvl_waiting_state,
bool is_safe_to_allocate_objects
);
static VALUE _native_thread_list(VALUE self);
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state);
static struct per_thread_context *get_context_for(VALUE thread, struct thread_context_collector_state *state);
static void initialize_context(VALUE thread, struct per_thread_context *thread_context, struct thread_context_collector_state *state);
static void free_context(struct per_thread_context* thread_context);
static VALUE _native_inspect(VALUE self, VALUE collector_instance);
static VALUE per_thread_context_st_table_as_ruby_hash(struct thread_context_collector_state *state);
static int per_thread_context_as_ruby_hash(st_data_t key_thread, st_data_t value_context, st_data_t result_hash);
static VALUE stats_as_ruby_hash(struct thread_context_collector_state *state);
static VALUE gc_tracking_as_ruby_hash(struct thread_context_collector_state *state);
static void remove_context_for_dead_threads(struct thread_context_collector_state *state);
static int remove_if_dead_thread(st_data_t key_thread, st_data_t value_context, st_data_t _argument);
static VALUE _native_per_thread_context(VALUE self, VALUE collector_instance);
static long update_time_since_previous_sample(long *time_at_previous_sample_ns, long current_time_ns, long gc_start_time_ns, bool is_wall_time);
static long cpu_time_now_ns(struct per_thread_context *thread_context);
static long thread_id_for(VALUE thread);
static VALUE _native_stats(VALUE self, VALUE collector_instance);
static VALUE _native_gc_tracking(VALUE self, VALUE collector_instance);
static void trace_identifiers_for(
struct thread_context_collector_state *state,
VALUE thread,
struct trace_identifiers *trace_identifiers_result,
bool is_safe_to_allocate_objects
);
static bool should_collect_resource(VALUE root_span);
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
static VALUE thread_list(struct thread_context_collector_state *state);
static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object);
static VALUE _native_new_empty_thread(VALUE self);
static ddog_CharSlice ruby_value_type_to_class_name(enum ruby_value_type type);
static void ddtrace_otel_trace_identifiers_for(
struct thread_context_collector_state *state,
VALUE *active_trace,
VALUE *root_span,
VALUE *numeric_span_id,
VALUE active_span,
VALUE otel_values,
bool is_safe_to_allocate_objects
);
static VALUE _native_sample_skipped_allocation_samples(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE skipped_samples);
static bool handle_gvl_waiting(
struct thread_context_collector_state *state,
VALUE thread_being_sampled,
VALUE stack_from_thread,
struct per_thread_context *thread_context,
sampling_buffer* sampling_buffer,
long current_cpu_time_ns
);
static VALUE _native_on_gvl_waiting(DDTRACE_UNUSED VALUE self, VALUE thread);
static VALUE _native_gvl_waiting_at_for(DDTRACE_UNUSED VALUE self, VALUE thread);
static VALUE _native_on_gvl_running(DDTRACE_UNUSED VALUE self, VALUE thread);
static VALUE _native_sample_after_gvl_running(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread);
static VALUE _native_apply_delta_to_cpu_time_at_previous_sample_ns(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE thread, VALUE delta_ns);
static void otel_without_ddtrace_trace_identifiers_for(
struct thread_context_collector_state *state,
VALUE thread,
struct trace_identifiers *trace_identifiers_result,
bool is_safe_to_allocate_objects
);
static struct otel_span otel_span_from(VALUE otel_context, VALUE otel_current_span_key);
static uint64_t otel_span_id_to_uint(VALUE otel_span_id);
void collectors_thread_context_init(VALUE profiling_module) {
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
VALUE collectors_thread_context_class = rb_define_class_under(collectors_module, "ThreadContext", rb_cObject);
// Hosts methods used for testing the native code using RSpec
VALUE testing_module = rb_define_module_under(collectors_thread_context_class, "Testing");
// Instances of the ThreadContext class are "TypedData" objects.
// "TypedData" objects are special objects in the Ruby VM that can wrap C structs.
// In this case, it wraps the thread_context_collector_state.
//
// Because Ruby doesn't know how to initialize native-level structs, we MUST override the allocation function for objects
// of this class so that we can manage this part. Not overriding or disabling the allocation function is a common
// gotcha for "TypedData" objects that can very easily lead to VM crashes, see for instance
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
rb_define_alloc_func(collectors_thread_context_class, _native_new);
rb_define_singleton_method(collectors_thread_context_class, "_native_initialize", _native_initialize, -1);
rb_define_singleton_method(collectors_thread_context_class, "_native_inspect", _native_inspect, 1);
rb_define_singleton_method(collectors_thread_context_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
rb_define_singleton_method(testing_module, "_native_sample", _native_sample, 2);
rb_define_singleton_method(testing_module, "_native_sample_allocation", _native_sample_allocation, 3);
rb_define_singleton_method(testing_module, "_native_on_gc_start", _native_on_gc_start, 1);
rb_define_singleton_method(testing_module, "_native_on_gc_finish", _native_on_gc_finish, 1);
rb_define_singleton_method(testing_module, "_native_sample_after_gc", _native_sample_after_gc, 2);
rb_define_singleton_method(testing_module, "_native_thread_list", _native_thread_list, 0);
rb_define_singleton_method(testing_module, "_native_per_thread_context", _native_per_thread_context, 1);
rb_define_singleton_method(testing_module, "_native_stats", _native_stats, 1);
rb_define_singleton_method(testing_module, "_native_gc_tracking", _native_gc_tracking, 1);
rb_define_singleton_method(testing_module, "_native_new_empty_thread", _native_new_empty_thread, 0);
rb_define_singleton_method(testing_module, "_native_sample_skipped_allocation_samples", _native_sample_skipped_allocation_samples, 2);
#ifndef NO_GVL_INSTRUMENTATION
rb_define_singleton_method(testing_module, "_native_on_gvl_waiting", _native_on_gvl_waiting, 1);
rb_define_singleton_method(testing_module, "_native_gvl_waiting_at_for", _native_gvl_waiting_at_for, 1);
rb_define_singleton_method(testing_module, "_native_on_gvl_running", _native_on_gvl_running, 1);
rb_define_singleton_method(testing_module, "_native_sample_after_gvl_running", _native_sample_after_gvl_running, 2);
rb_define_singleton_method(testing_module, "_native_apply_delta_to_cpu_time_at_previous_sample_ns", _native_apply_delta_to_cpu_time_at_previous_sample_ns, 3);
#endif
at_active_span_id = rb_intern_const("@active_span");
at_active_trace_id = rb_intern_const("@active_trace");
at_id_id = rb_intern_const("@id");
at_resource_id = rb_intern_const("@resource");
at_root_span_id = rb_intern_const("@root_span");
at_type_id = rb_intern_const("@type");
at_otel_values_id = rb_intern_const("@otel_values");
at_parent_span_id_id = rb_intern_const("@parent_span_id");
at_datadog_trace_id = rb_intern_const("@datadog_trace");
at_span_id_id = rb_intern_const("@span_id");
at_trace_id_id = rb_intern_const("@trace_id");
at_entries_id = rb_intern_const("@entries");
at_context_id = rb_intern_const("@context");
at_kind_id = rb_intern_const("@kind");
at_name_id = rb_intern_const("@name");
server_id = rb_intern_const("server");
otel_context_storage_id = rb_intern_const("__opentelemetry_context_storage__");
#ifndef NO_GVL_INSTRUMENTATION
// This will raise if Ruby already ran out of thread-local keys
gvl_profiling_init();
#endif
gc_profiling_init();
}
// This structure is used to define a Ruby object that stores a pointer to a struct thread_context_collector_state
// See also https://github.com/ruby/ruby/blob/master/doc/extension.rdoc for how this works
static const rb_data_type_t thread_context_collector_typed_data = {
.wrap_struct_name = "Datadog::Profiling::Collectors::ThreadContext",
.function = {
.dmark = thread_context_collector_typed_data_mark,
.dfree = thread_context_collector_typed_data_free,
.dsize = NULL, // We don't track profile memory usage (although it'd be cool if we did!)
//.dcompact = NULL, // FIXME: Add support for compaction
},
.flags = RUBY_TYPED_FREE_IMMEDIATELY
};
// This function is called by the Ruby GC to give us a chance to mark any Ruby objects that we're holding on to,
// so that they don't get garbage collected
static void thread_context_collector_typed_data_mark(void *state_ptr) {
struct thread_context_collector_state *state = (struct thread_context_collector_state *) state_ptr;
// Update this when modifying state struct
rb_gc_mark(state->recorder_instance);
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
rb_gc_mark(state->thread_list_buffer);
rb_gc_mark(state->main_thread);
rb_gc_mark(state->otel_current_span_key);
}
static void thread_context_collector_typed_data_free(void *state_ptr) {
struct thread_context_collector_state *state = (struct thread_context_collector_state *) state_ptr;
// Update this when modifying state struct
// Important: Remember that we're only guaranteed to see here what's been set in _native_new, aka
// pointers that have been set NULL there may still be NULL here.
if (state->locations != NULL) ruby_xfree(state->locations);
// Free each entry in the map
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_free_values, 0 /* unused */);
// ...and then the map
st_free_table(state->hash_map_per_thread_context);
ruby_xfree(state);
}
// Mark Ruby thread references we keep as keys in hash_map_per_thread_context
static int hash_map_per_thread_context_mark(st_data_t key_thread, DDTRACE_UNUSED st_data_t _value, DDTRACE_UNUSED st_data_t _argument) {
VALUE thread = (VALUE) key_thread;
rb_gc_mark(thread);
return ST_CONTINUE;
}
// Used to clear each of the per_thread_contexts inside the hash_map_per_thread_context
static int hash_map_per_thread_context_free_values(DDTRACE_UNUSED st_data_t _thread, st_data_t value_per_thread_context, DDTRACE_UNUSED st_data_t _argument) {
struct per_thread_context *thread_context = (struct per_thread_context*) value_per_thread_context;
free_context(thread_context);
return ST_CONTINUE;
}
static VALUE _native_new(VALUE klass) {
struct thread_context_collector_state *state = ruby_xcalloc(1, sizeof(struct thread_context_collector_state));
// Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory
// being leaked.
// Update this when modifying state struct
state->locations = NULL;
state->max_frames = 0;
state->hash_map_per_thread_context =
// "numtable" is an awful name, but TL;DR it's what should be used when keys are `VALUE`s.
st_init_numtable();
state->recorder_instance = Qnil;
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
VALUE thread_list_buffer = rb_ary_new();
state->thread_list_buffer = thread_list_buffer;
state->endpoint_collection_enabled = true;
state->timeline_enabled = true;
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
VALUE main_thread = rb_thread_main();
state->main_thread = main_thread;
state->otel_current_span_key = Qtrue;
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = 0;
// Note: Remember to keep any new allocated objects that get stored in the state also on the stack + mark them with
// RB_GC_GUARD -- otherwise it's possible for a GC to run and
// since the instance representing the state does not yet exist, such objects will not get marked.
VALUE instance = TypedData_Wrap_Struct(klass, &thread_context_collector_typed_data, state);
RB_GC_GUARD(thread_list_buffer);
RB_GC_GUARD(main_thread); // Arguably not needed, but perhaps can be move in some future Ruby release?
return instance;
}
static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _self) {
VALUE options;
rb_scan_args(argc, argv, "0:", &options);
if (options == Qnil) options = rb_hash_new();
VALUE self_instance = rb_hash_fetch(options, ID2SYM(rb_intern("self_instance")));
VALUE recorder_instance = rb_hash_fetch(options, ID2SYM(rb_intern("recorder")));
VALUE max_frames = rb_hash_fetch(options, ID2SYM(rb_intern("max_frames")));
VALUE tracer_context_key = rb_hash_fetch(options, ID2SYM(rb_intern("tracer_context_key")));
VALUE endpoint_collection_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("endpoint_collection_enabled")));
VALUE timeline_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("timeline_enabled")));
VALUE waiting_for_gvl_threshold_ns = rb_hash_fetch(options, ID2SYM(rb_intern("waiting_for_gvl_threshold_ns")));
VALUE otel_context_enabled = rb_hash_fetch(options, ID2SYM(rb_intern("otel_context_enabled")));
ENFORCE_TYPE(max_frames, T_FIXNUM);
ENFORCE_BOOLEAN(endpoint_collection_enabled);
ENFORCE_BOOLEAN(timeline_enabled);
ENFORCE_TYPE(waiting_for_gvl_threshold_ns, T_FIXNUM);
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
// Update this when modifying state struct
state->max_frames = sampling_buffer_check_max_frames(NUM2INT(max_frames));
state->locations = ruby_xcalloc(state->max_frames, sizeof(ddog_prof_Location));
// hash_map_per_thread_context is already initialized, nothing to do here
state->recorder_instance = enforce_recorder_instance(recorder_instance);
state->endpoint_collection_enabled = (endpoint_collection_enabled == Qtrue);
state->timeline_enabled = (timeline_enabled == Qtrue);
if (otel_context_enabled == Qfalse || otel_context_enabled == Qnil) {
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_FALSE;
} else if (otel_context_enabled == ID2SYM(rb_intern("only"))) {
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_ONLY;
} else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_BOTH;
} else {
rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
}
global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
if (RTEST(tracer_context_key)) {
ENFORCE_TYPE(tracer_context_key, T_SYMBOL);
// Note about rb_to_id and dynamic symbols: calling `rb_to_id` prevents symbols from ever being garbage collected.
// In this case, we can't really escape this because as of this writing, ruby master still calls `rb_to_id` inside
// the implementation of Thread#[]= so any symbol that gets used as a key there will already be prevented from GC.
state->tracer_context_key = rb_to_id(tracer_context_key);
}
return Qtrue;
}
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread) {
if (!is_thread_alive(profiler_overhead_stack_thread)) rb_raise(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive");
thread_context_collector_sample(collector_instance, monotonic_wall_time_now_ns(RAISE_ON_FAILURE), profiler_overhead_stack_thread);
return Qtrue;
}
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_on_gc_start(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
thread_context_collector_on_gc_start(collector_instance);
return Qtrue;
}
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_on_gc_finish(DDTRACE_UNUSED VALUE self, VALUE collector_instance) {
(void) !thread_context_collector_on_gc_finish(collector_instance);
return Qtrue;
}
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_sample_after_gc(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE reset_monotonic_to_system_state) {
ENFORCE_BOOLEAN(reset_monotonic_to_system_state);
struct thread_context_collector_state *state;
TypedData_Get_Struct(collector_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
if (reset_monotonic_to_system_state == Qtrue) {
state->time_converter_state = (monotonic_to_system_epoch_state) MONOTONIC_TO_SYSTEM_EPOCH_INITIALIZER;
}
thread_context_collector_sample_after_gc(collector_instance);
return Qtrue;
}
// This function gets called from the Collectors::CpuAndWallTimeWorker to trigger the actual sampling.
//
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
// Assumption 3: This function IS NOT called from a signal handler. This function is not async-signal-safe.
// Assumption 4: This function IS NOT called in a reentrant way.
// Assumption 5: This function is called from the main Ractor (if Ruby has support for Ractors).
//
// The `profiler_overhead_stack_thread` is used to attribute the profiler overhead to a stack borrowed from a different thread
// (belonging to ddtrace), so that the overhead is visible in the profile rather than blamed on user code.
void thread_context_collector_sample(VALUE self_instance, long current_monotonic_wall_time_ns, VALUE profiler_overhead_stack_thread) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
VALUE current_thread = rb_thread_current();
struct per_thread_context *current_thread_context = get_or_create_context_for(current_thread, state);
long cpu_time_at_sample_start_for_current_thread = cpu_time_now_ns(current_thread_context);
VALUE threads = thread_list(state);
const long thread_count = RARRAY_LEN(threads);
for (long i = 0; i < thread_count; i++) {
VALUE thread = RARRAY_AREF(threads, i);
struct per_thread_context *thread_context = get_or_create_context_for(thread, state);
// We account for cpu-time for the current thread in a different way -- we use the cpu-time at sampling start, to avoid
// blaming the time the profiler took on whatever's running on the thread right now
long current_cpu_time_ns = thread != current_thread ? cpu_time_now_ns(thread_context) : cpu_time_at_sample_start_for_current_thread;
update_metrics_and_sample(
state,
/* thread_being_sampled: */ thread,
/* stack_from_thread: */ thread,
thread_context,
thread_context->sampling_buffer,
current_cpu_time_ns,
current_monotonic_wall_time_ns
);
}
state->sample_count++;
// TODO: This seems somewhat overkill and inefficient to do often; right now we just do it every few samples
// but there's probably a better way to do this if we actually track when threads finish
if (state->sample_count % 100 == 0) remove_context_for_dead_threads(state);
update_metrics_and_sample(
state,
/* thread_being_sampled: */ current_thread,
/* stack_from_thread: */ profiler_overhead_stack_thread,
current_thread_context,
// Here we use the overhead thread's sampling buffer so as to not invalidate the cache in the buffer of the thread being sampled
get_or_create_context_for(profiler_overhead_stack_thread, state)->sampling_buffer,
cpu_time_now_ns(current_thread_context),
monotonic_wall_time_now_ns(RAISE_ON_FAILURE)
);
}
static void update_metrics_and_sample(
struct thread_context_collector_state *state,
VALUE thread_being_sampled,
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
struct per_thread_context *thread_context,
sampling_buffer* sampling_buffer,
long current_cpu_time_ns,
long current_monotonic_wall_time_ns
) {
bool is_gvl_waiting_state =
handle_gvl_waiting(state, thread_being_sampled, stack_from_thread, thread_context, sampling_buffer, current_cpu_time_ns);
// Don't assign/update cpu during "Waiting for GVL"
long cpu_time_elapsed_ns = is_gvl_waiting_state ? 0 : update_time_since_previous_sample(
&thread_context->cpu_time_at_previous_sample_ns,
current_cpu_time_ns,
thread_context->gc_tracking.cpu_time_at_start_ns,
IS_NOT_WALL_TIME
);
long wall_time_elapsed_ns = update_time_since_previous_sample(
&thread_context->wall_time_at_previous_sample_ns,
current_monotonic_wall_time_ns,
// We explicitly pass in `INVALID_TIME` as an argument for `gc_start_time_ns` here because we don't want wall-time
// accounting to change during GC.
// E.g. if 60 seconds pass in the real world, 60 seconds of wall-time are recorded, regardless of the thread doing
// GC or not.
INVALID_TIME,
IS_WALL_TIME
);
// A thread enters "Waiting for GVL", well, as the name implies, without the GVL.
//
// As a consequence, it's possible that a thread enters "Waiting for GVL" in parallel with the current thread working
// on sampling, and thus for the `current_monotonic_wall_time_ns` (which is recorded at the start of sampling)
// to be < the time at which we started Waiting for GVL.
//
// All together, this means that when `handle_gvl_waiting` creates an extra sample (see comments on that function for
// what the extra sample is), it's possible that there's no more wall-time to be assigned.
// Thus, in this case, we don't want to produce a sample representing Waiting for GVL with a wall-time of 0, and
// thus we skip creating such a sample.
if (is_gvl_waiting_state && wall_time_elapsed_ns == 0) return;
// ...you may also wonder: is there any other situation where it makes sense to produce a sample with
// wall_time_elapsed_ns == 0? I believe that yes, because the sample still includes a timestamp and a stack, but we
// may revisit/change our minds on this in the future.
trigger_sample_for_thread(
state,
thread_being_sampled,
stack_from_thread,
thread_context,
sampling_buffer,
(sample_values) {.cpu_time_ns = cpu_time_elapsed_ns, .cpu_or_wall_samples = 1, .wall_time_ns = wall_time_elapsed_ns},
current_monotonic_wall_time_ns,
NULL,
NULL,
is_gvl_waiting_state,
/* is_safe_to_allocate_objects: */ true // We called from a context that's safe to run any regular code, including allocations
);
}
// This function gets called when Ruby is about to start running the Garbage Collector on the current thread.
// It updates the per_thread_context of the current thread to include the current cpu/wall times, to be used to later
// create an event including the cpu/wall time spent in garbage collector work.
//
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
//
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
void thread_context_collector_on_gc_start(VALUE self_instance) {
struct thread_context_collector_state *state;
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return;
// This should never fail the the above check passes
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
// If there was no previously-existing context for this thread, we won't allocate one (see safety). For now we just drop
// the GC sample, under the assumption that "a thread that is so new that we never sampled it even once before it triggers
// GC" is a rare enough case that we can just ignore it.
// We can always improve this later if we find that this happens often (and we have the counter to help us figure that out)!
if (thread_context == NULL) {
state->stats.gc_samples_missed_due_to_missing_context++;
return;
}
// Here we record the wall-time first and in on_gc_finish we record it second to try to avoid having wall-time be slightly < cpu-time
thread_context->gc_tracking.wall_time_at_start_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
thread_context->gc_tracking.cpu_time_at_start_ns = cpu_time_now_ns(thread_context);
}
// This function gets called when Ruby has finished running the Garbage Collector on the current thread.
// It records the cpu/wall-time observed during GC, which will be used to later
// create an event including the cpu/wall time spent from the start of garbage collector work until now.
//
// Safety: This function gets called while Ruby is doing garbage collection. While Ruby is doing garbage collection,
// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation.
// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)!
//
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
// Assumption 2: This function is called from the main Ractor (if Ruby has support for Ractors).
__attribute__((warn_unused_result))
bool thread_context_collector_on_gc_finish(VALUE self_instance) {
struct thread_context_collector_state *state;
if (!rb_typeddata_is_kind_of(self_instance, &thread_context_collector_typed_data)) return false;
// This should never fail the the above check passes
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
struct per_thread_context *thread_context = get_context_for(rb_thread_current(), state);
// If there was no previously-existing context for this thread, we won't allocate one (see safety). We keep a metric for
// how often this happens -- see on_gc_start.
if (thread_context == NULL) return false;
long cpu_time_at_start_ns = thread_context->gc_tracking.cpu_time_at_start_ns;
long wall_time_at_start_ns = thread_context->gc_tracking.wall_time_at_start_ns;
if (cpu_time_at_start_ns == INVALID_TIME && wall_time_at_start_ns == INVALID_TIME) {
// If this happened, it means that on_gc_start was either never called for the thread OR it was called but no thread
// context existed at the time. The former can be the result of a bug, but since we can't distinguish them, we just
// do nothing.
return false;
}
// Mark thread as no longer in GC
thread_context->gc_tracking.cpu_time_at_start_ns = INVALID_TIME;
thread_context->gc_tracking.wall_time_at_start_ns = INVALID_TIME;
// Here we record the wall-time second and in on_gc_start we record it first to try to avoid having wall-time be slightly < cpu-time
long cpu_time_at_finish_ns = cpu_time_now_ns(thread_context);
long wall_time_at_finish_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
// If our end timestamp is not OK, we bail out
if (wall_time_at_finish_ns == 0) return false;
long gc_cpu_time_elapsed_ns = cpu_time_at_finish_ns - cpu_time_at_start_ns;
long gc_wall_time_elapsed_ns = wall_time_at_finish_ns - wall_time_at_start_ns;
// Wall-time can go backwards if the system clock gets changed (and we observed spurious jumps back on macOS as well)
// so let's ensure we don't get negative values for time deltas.
gc_cpu_time_elapsed_ns = long_max_of(gc_cpu_time_elapsed_ns, 0);
gc_wall_time_elapsed_ns = long_max_of(gc_wall_time_elapsed_ns, 0);
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
state->gc_tracking.accumulated_cpu_time_ns = 0;
state->gc_tracking.accumulated_wall_time_ns = 0;
}
state->gc_tracking.accumulated_cpu_time_ns += gc_cpu_time_elapsed_ns;
state->gc_tracking.accumulated_wall_time_ns += gc_wall_time_elapsed_ns;
state->gc_tracking.wall_time_at_previous_gc_ns = wall_time_at_finish_ns;
// Update cpu-time accounting so it doesn't include the cpu-time spent in GC during the next sample
// We don't update the wall-time because we don't subtract the wall-time spent in GC (see call to
// `update_time_since_previous_sample` for wall-time in `update_metrics_and_sample`).
if (thread_context->cpu_time_at_previous_sample_ns != INVALID_TIME) {
thread_context->cpu_time_at_previous_sample_ns += gc_cpu_time_elapsed_ns;
}
// Let the caller know if it should schedule a flush or not. Returning true every time would cause a lot of overhead
// on the application (see GC tracking introduction at the top of the file), so instead we try to accumulate a few
// samples first.
bool over_flush_time_treshold =
(wall_time_at_finish_ns - state->gc_tracking.wall_time_at_last_flushed_gc_event_ns) >= TIME_BETWEEN_GC_EVENTS_NS;
if (over_flush_time_treshold) {
return true;
} else {
return gc_profiling_has_major_gc_finished();
}
}
// This function gets called after one or more GC work steps (calls to on_gc_start/on_gc_finish).
// It creates a new sample including the cpu and wall-time spent by the garbage collector work, and resets any
// GC-related tracking.
//
// Assumption 1: This function is called in a thread that is holding the Global VM Lock. Caller is responsible for enforcing this.
// Assumption 2: This function is allowed to raise exceptions. Caller is responsible for handling them, if needed.
// Assumption 3: Unlike `on_gc_start` and `on_gc_finish`, this method is allowed to allocate memory as needed.
// Assumption 4: This function is called from the main Ractor (if Ruby has support for Ractors).
VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
struct thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state);
if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
}
int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
ddog_prof_Label labels[max_labels_needed_for_gc];
uint8_t label_pos = gc_profiling_set_metadata(labels, max_labels_needed_for_gc);
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
int64_t end_timestamp_ns = 0;
if (state->timeline_enabled) {
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, state->gc_tracking.wall_time_at_previous_gc_ns);
}
record_placeholder_stack(
state->recorder_instance,
(sample_values) {
// This event gets both a regular cpu/wall-time duration, as a normal cpu/wall-time sample would, as well as a
// timeline duration.
// This is done to enable two use-cases:
// * regular cpu/wall-time makes this event show up as a regular stack in the flamegraph
// * the timeline duration is used when the event shows up in the timeline
.cpu_time_ns = state->gc_tracking.accumulated_cpu_time_ns,
.cpu_or_wall_samples = 1,
.wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
.timeline_wall_time_ns = state->gc_tracking.accumulated_wall_time_ns,
},
(sample_labels) {.labels = slice_labels, .state_label = NULL, .end_timestamp_ns = end_timestamp_ns},
DDOG_CHARSLICE_C("Garbage Collection")
);
state->gc_tracking.wall_time_at_last_flushed_gc_event_ns = state->gc_tracking.wall_time_at_previous_gc_ns;
state->gc_tracking.wall_time_at_previous_gc_ns = INVALID_TIME;
state->stats.gc_samples++;
// Let recorder do any cleanup/updates it requires after a GC step.
recorder_after_gc_step(state->recorder_instance);
// Return a VALUE to make it easier to call this function from Ruby APIs that expect a return value (such as rb_rescue2)
return Qnil;
}
static void trigger_sample_for_thread(
struct thread_context_collector_state *state,
VALUE thread,
VALUE stack_from_thread, // This can be different when attributing profiler overhead using a different stack
struct per_thread_context *thread_context,
sampling_buffer* sampling_buffer,
sample_values values,
long current_monotonic_wall_time_ns,
// These two labels are only used for allocation profiling; @ivoanjo: may want to refactor this at some point?
ddog_CharSlice *ruby_vm_type,
ddog_CharSlice *class_name,
bool is_gvl_waiting_state,
// If the Ruby VM is at a state that can allocate objects safely, or not. Added for allocation profiling: we're not
// allowed to allocate objects (or raise exceptions) when inside the NEWOBJ tracepoint.
bool is_safe_to_allocate_objects
) {
int max_label_count =
1 + // thread id
1 + // thread name
1 + // profiler overhead
2 + // ruby vm type and allocation class
1 + // state (only set for cpu/wall-time samples)
2; // local root span id and span id
ddog_prof_Label labels[max_label_count];
int label_pos = 0;
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("thread id"),
.str = thread_context->thread_id_char_slice
};
VALUE thread_name = thread_name_for(thread);
if (thread_name != Qnil) {
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("thread name"),
.str = char_slice_from_ruby_string(thread_name)
};
} else if (thread == state->main_thread) { // Threads are often not named, but we can have a nice fallback for this special thread
ddog_CharSlice main_thread_name = DDOG_CHARSLICE_C("main");
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("thread name"),
.str = main_thread_name
};
} else {
// For other threads without name, we use the "invoke location" (first file:line of the block used to start the thread), if any.
// This is what Ruby shows in `Thread#to_s`.
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("thread name"),
.str = thread_context->thread_invoke_location_char_slice // This is an empty string if no invoke location was available
};
}
struct trace_identifiers trace_identifiers_result = {.valid = false, .trace_endpoint = Qnil};
trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
if (!trace_identifiers_result.valid && state->otel_context_enabled != OTEL_CONTEXT_ENABLED_FALSE) {
// If we couldn't get something with ddtrace, let's see if we can get some trace identifiers from opentelemetry directly
otel_without_ddtrace_trace_identifiers_for(state, thread, &trace_identifiers_result, is_safe_to_allocate_objects);
}
if (trace_identifiers_result.valid) {
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("local root span id"), .num = trace_identifiers_result.local_root_span_id};
labels[label_pos++] = (ddog_prof_Label) {.key = DDOG_CHARSLICE_C("span id"), .num = trace_identifiers_result.span_id};
if (trace_identifiers_result.trace_endpoint != Qnil) {
// The endpoint gets recorded in a different way because it is mutable in the tracer and can change during a
// trace.
//
// Instead of each sample for the same local_root_span_id getting a potentially-different endpoint,
// `record_endpoint` (via libdatadog) keeps a list of local_root_span_id values and their most-recently-seen
// endpoint values, and at serialization time the most-recently-seen endpoint is applied to all relevant samples.
//
// This is why the endpoint is not directly added in this function to the labels array, although it will later
// show up in the array in the output pprof.
record_endpoint(
state->recorder_instance,
trace_identifiers_result.local_root_span_id,
char_slice_from_ruby_string(trace_identifiers_result.trace_endpoint)
);
}
}
if (thread != stack_from_thread) {
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("profiler overhead"),
.num = 1
};
}
if (ruby_vm_type != NULL) {
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("ruby vm type"),
.str = *ruby_vm_type
};
}
if (class_name != NULL) {
labels[label_pos++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("allocation class"),
.str = *class_name
};
}
// This label is handled specially:
// 1. It's only set for cpu/wall-time samples
// 2. We set it here to its default state of "unknown", but the `Collectors::Stack` may choose to override it with
// something more interesting.
ddog_prof_Label *state_label = NULL;
if (values.cpu_or_wall_samples > 0) {
state_label = &labels[label_pos++];
*state_label = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("state"),
.str = DDOG_CHARSLICE_C("unknown"),
.num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
};
}
// The number of times `label_pos++` shows up in this function needs to match `max_label_count`. To avoid "oops I
// forgot to update max_label_count" in the future, we've also added this validation.
// @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
// changes the code erroneously and remove it entirely?
if (label_pos > max_label_count) {
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
}
ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
// The end_timestamp_ns is treated specially by libdatadog and that's why it's not added as a ddog_prof_Label
int64_t end_timestamp_ns = 0;
if (state->timeline_enabled && current_monotonic_wall_time_ns != INVALID_TIME) {
end_timestamp_ns = monotonic_to_system_epoch_ns(&state->time_converter_state, current_monotonic_wall_time_ns);
}
sample_thread(
stack_from_thread,
sampling_buffer,
state->recorder_instance,
values,
(sample_labels) {
.labels = slice_labels,
.state_label = state_label,
.end_timestamp_ns = end_timestamp_ns,
.is_gvl_waiting_state = is_gvl_waiting_state,
}
);
}
// This method exists only to enable testing Datadog::Profiling::Collectors::ThreadContext behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
VALUE result = rb_ary_new();
ddtrace_thread_list(result);
return result;
}
static struct per_thread_context *get_or_create_context_for(VALUE thread, struct thread_context_collector_state *state) {
struct per_thread_context* thread_context = NULL;
st_data_t value_context = 0;
if (st_lookup(state->hash_map_per_thread_context, (st_data_t) thread, &value_context)) {
thread_context = (struct per_thread_context*) value_context;
} else {
thread_context = ruby_xcalloc(1, sizeof(struct per_thread_context));
initialize_context(thread, thread_context, state);
st_insert(state->hash_map_per_thread_context, (st_data_t) thread, (st_data_t) thread_context);
}