Skip to content

Commit

Permalink
Merge branch 'master' into disable-env-logs
Browse files Browse the repository at this point in the history
  • Loading branch information
marcotc authored Dec 3, 2024
2 parents 59feaca + 7f079c9 commit f7780e2
Show file tree
Hide file tree
Showing 24 changed files with 943 additions and 301 deletions.
133 changes: 133 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Please do NOT manually edit this file.
# This file is generated by 'bundle exec rake github:actions:test_template'
---
name: Unit Tests
'on':
push:
branches:
- master
- poc/**
schedule:
- cron: 0 7 * * *
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: "${{ github.ref != 'refs/heads/master' }}"
jobs:
compute_tasks:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
engine:
- name: ruby
version: '3.3'
alias: ruby-33
- name: ruby
version: '3.2'
alias: ruby-32
container:
image: ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }}
outputs:
ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}"
ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}"
steps:
- uses: actions/checkout@v4
- run: bundle install
- id: set-matrix
run: |
matrix_json=$(bundle exec rake github:generate_matrix)
# Debug output
echo "Generated JSON:"
echo "$matrix_json"
# Set the output
echo "${{ matrix.engine.alias }}=$(echo "$matrix_json")" >> $GITHUB_OUTPUT
- run: bundle cache
- uses: actions/upload-artifact@v4
with:
name: bundled-dependencies-${{ github.run_id }}-${{ matrix.engine.alias }}
retention-days: 1
path: |
Gemfile.lock
vendor/
test-ruby-33:
name: 'ruby-3.3: ${{ matrix.task }} (${{ matrix.group }})'
needs:
- compute_tasks
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
include: "${{ fromJson(needs.compute_tasks.outputs.ruby-33-matrix) }}"
container:
image: ghcr.io/datadog/images-rb/engines/ruby:3.3
env:
TEST_POSTGRES_HOST: postgres
TEST_REDIS_HOST: redis
services:
postgres:
image: postgres:9.6
credentials:
username: "${{ secrets.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
env:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: postgres
redis:
image: redis:6.2
credentials:
username: "${{ secrets.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
steps:
- uses: actions/checkout@v4
- name: Configure Git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- uses: actions/download-artifact@v4
with:
name: bundled-dependencies-${{ github.run_id }}-ruby-33
- run: bundle install --local
- name: Test ${{ matrix.task }} with ${{ matrix.gemfile }}
env:
BUNDLE_GEMFILE: "${{ matrix.gemfile }}"
run: bundle install && bundle exec rake spec:${{ matrix.task }}
test-ruby-32:
name: 'ruby-3.2: ${{ matrix.task }} (${{ matrix.group }})'
needs:
- compute_tasks
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
include: "${{ fromJson(needs.compute_tasks.outputs.ruby-32-matrix) }}"
container:
image: ghcr.io/datadog/images-rb/engines/ruby:3.2
env:
TEST_POSTGRES_HOST: postgres
TEST_REDIS_HOST: redis
services:
postgres:
image: postgres:9.6
credentials:
username: "${{ secrets.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
env:
POSTGRES_PASSWORD: postgres
POSTGRES_USER: postgres
POSTGRES_DB: postgres
redis:
image: redis:6.2
credentials:
username: "${{ secrets.DOCKERHUB_USERNAME }}"
password: "${{ secrets.DOCKERHUB_TOKEN }}"
steps:
- uses: actions/checkout@v4
- name: Configure Git
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- uses: actions/download-artifact@v4
with:
name: bundled-dependencies-${{ github.run_id }}-ruby-32
- run: bundle install --local
- name: Test ${{ matrix.task }} with ${{ matrix.gemfile }}
env:
BUNDLE_GEMFILE: "${{ matrix.gemfile }}"
run: bundle install && bundle exec rake spec:${{ matrix.task }}
1 change: 1 addition & 0 deletions .gitlab/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ ddprof-benchmark:
benchmarks:
stage: benchmarks
when: always
needs: []
tags: ["runner:apm-k8s-tweaked-metal"]
image: $BASE_CI_IMAGE
interruptible: true
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"files.associations": {
"*.gemfile": "ruby",
"Matrixfile": "ruby",
"Dockerfile*": "dockerfile"
}
}
3 changes: 3 additions & 0 deletions Matrixfile
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@
'redis-4' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
'redis-5' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby'
},
'appsec:active_record' => {
'relational_db' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
},
'appsec:rack' => {
'rack-latest' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
'rack-3' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby',
Expand Down
3 changes: 2 additions & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ namespace :spec do
end

namespace :appsec do
task all: [:main, :rack, :rails, :sinatra, :devise, :graphql]
task all: [:main, :active_record, :rack, :rails, :sinatra, :devise, :graphql]

# Datadog AppSec main specs
desc '' # "Explicitly hiding from `rake -T`"
Expand All @@ -280,6 +280,7 @@ namespace :spec do

# Datadog AppSec integrations
[
:active_record,
:rack,
:sinatra,
:rails,
Expand Down
8 changes: 0 additions & 8 deletions ext/datadog_profiling_native_extension/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,6 @@ def skip_building_extension!(reason)
# On older Rubies, there was no jit_return member on the rb_control_frame_t struct
$defs << "-DNO_JIT_RETURN" if RUBY_VERSION < "3.1"

# On older Rubies, rb_gc_force_recycle allowed to free objects in a way that
# would be invisible to free tracepoints, finalizers and without cleaning
# obj_to_id_tbl mappings.
$defs << "-DHAVE_WORKING_RB_GC_FORCE_RECYCLE" if RUBY_VERSION < "3.1"

# On older Rubies, there are no Ractors
$defs << "-DNO_RACTORS" if RUBY_VERSION < "3"

Expand All @@ -184,9 +179,6 @@ def skip_building_extension!(reason)
# On older Rubies, objects would not move
$defs << "-DNO_T_MOVED" if RUBY_VERSION < "2.7"

# On older Rubies, there was no RUBY_SEEN_OBJ_ID flag
$defs << "-DNO_SEEN_OBJ_ID_FLAG" if RUBY_VERSION < "2.7"

# On older Rubies, rb_global_vm_lock_struct did not include the owner field
$defs << "-DNO_GVL_OWNER" if RUBY_VERSION < "2.6"

Expand Down
96 changes: 7 additions & 89 deletions ext/datadog_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@
#include "libdatadog_helpers.h"
#include "time_helpers.h"

#if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG))
#define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
#endif

// Minimum age (in GC generations) of heap objects we want to include in heap
// recorder iterations. Object with age 0 represent objects that have yet to undergo
// a GC and, thus, may just be noise/trash at instant of iteration and are usually not
Expand Down Expand Up @@ -123,9 +119,6 @@ typedef struct {
// Pointer to the (potentially partial) object_record containing metadata about an ongoing recording.
// When NULL, this symbolizes an unstarted/invalid recording.
object_record *object_record;
// A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object
// as part of our workaround around rb_gc_force_recycle issues.
bool did_recycle_workaround;
} recording;

struct heap_recorder {
Expand Down Expand Up @@ -342,46 +335,12 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
}

bool did_recycle_workaround = false;

#ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
// If we are in a ruby version that has a working rb_gc_force_recycle implementation,
// its usage may lead to an object being re-used outside of the typical GC cycle.
//
// This re-use is in theory invisible to us unless we're lucky enough to sample both
// the original object and the replacement that uses the recycled slot.
//
// In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366)
// that non-noop implementations of rb_gc_force_recycle have an implementation bug
// which results in the object that re-used the recycled slot inheriting the same
// object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to
// "observe" implicit frees when an object we are tracking is force-recycled.
//
// However, it may happen that we start tracking a new object and that object was
// allocated on a recycled slot. Due to the bug, this object would be missing the
// FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be,
// when we're doing our liveness check, the absence of the flag would trigger our
// implicit free workaround and the object would be inferred as recycled even though
// it might still be alive.
//
// Thus, if we detect that this new allocation is already missing the flag at the start
// of the heap allocation recording, we force-set it. This should be safe since we
// just called rb_obj_id on it above and the expectation is that any flaggable object
// that goes through it ends up with the flag set (as evidenced by the GC_ASSERT
// lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14).
if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) {
RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID);
did_recycle_workaround = true;
}
#endif

heap_recorder->active_recording = (recording) {
.object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
.weight = weight * heap_recorder->sample_rate,
.class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
.alloc_gen = rb_gc_count(),
}),
.did_recycle_workaround = did_recycle_workaround,
}),
};
}

Expand Down Expand Up @@ -685,41 +644,6 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext

// If we got this far, then we found a valid live object for the tracked id.

#ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND
// If we are in a ruby version that has a working rb_gc_force_recycle implementation,
// its usage may lead to an object being re-used outside of the typical GC cycle.
//
// This re-use is in theory invisible to us and would mean that the ref from which we
// collected the object_record metadata may not be the same as the current ref and
// thus any further reporting would be innacurately attributed to stale metadata.
//
// In practice, there is a way for us to notice that this happened because of a bug
// in the implementation of rb_gc_force_recycle. Our heap profiler relies on object
// ids and id2ref to detect whether objects are still alive. Turns out that when an
// object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID
// of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment
// in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517
//
// Thus, if we detect that the ref we just resolved above is missing this flag, we can
// safely say re-use happened and thus treat it as an implicit free of the object
// we were tracking (the original one which got recycled).
if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) {

// NOTE: We don't really need to set this flag for heap recorder to work correctly
// but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle
// which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj
// tables in objspace. We already do the same thing when we sample a recycled object,
// here we apply it as well to objects that replace recycled objects that were being
// tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366
RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID);

on_committed_object_record_cleanup(recorder, record);
recorder->stats_last_update.objects_dead++;
return ST_DELETE;
}

#endif

if (
recorder->size_enabled &&
recorder->update_include_old && // We only update sizes when doing a full update
Expand Down Expand Up @@ -803,18 +727,12 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *
object_record *new_object_record = recording.object_record;
if (existing) {
object_record *existing_record = (object_record*) (*value);
if (recording.did_recycle_workaround) {
// In this case, it's possible for an object id to be re-used and we were lucky enough to have
// sampled both the original object and the replacement so cleanup the old one and replace it with
// the new object_record (i.e. treat this as a combined free+allocation).
on_committed_object_record_cleanup(update_data->heap_recorder, existing_record);
} else {
// This is not supposed to happen, raising...
VALUE existing_inspect = object_record_inspect(existing_record);
VALUE new_inspect = object_record_inspect(new_object_record);
rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
"the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
}

// This is not supposed to happen, raising...
VALUE existing_inspect = object_record_inspect(existing_record);
VALUE new_inspect = object_record_inspect(new_object_record);
rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with "
"the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect);
}
// Always carry on with the update, we want the new record to be there at the end
(*value) = (st_data_t) new_object_record;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ bool is_current_thread_holding_the_gvl(void) {
//
// Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`.
//
// For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 by default, and we print a
// For this reason we default to use the "no signals workaround" on Ruby 2.5 by default, and we print a
// warning when customers force-enable it.
bool gvl_acquired = vm->gvl.acquired != 0;
rb_thread_t *current_owner = vm->running_thread;
Expand Down
34 changes: 0 additions & 34 deletions ext/datadog_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,6 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio
static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj);
static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj);
static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance);
static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns);
static VALUE _native_is_object_recorded(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE object_id);
Expand Down Expand Up @@ -297,10 +295,6 @@ void stack_recorder_init(VALUE profiling_module) {
_native_end_fake_slow_heap_serialization, 1);
rb_define_singleton_method(testing_module, "_native_debug_heap_recorder",
_native_debug_heap_recorder, 1);
rb_define_singleton_method(testing_module, "_native_gc_force_recycle",
_native_gc_force_recycle, 1);
rb_define_singleton_method(testing_module, "_native_has_seen_id_flag",
_native_has_seen_id_flag, 1);
rb_define_singleton_method(testing_module, "_native_is_object_recorded?", _native_is_object_recorded, 2);
rb_define_singleton_method(testing_module, "_native_heap_recorder_reset_last_update", _native_heap_recorder_reset_last_update, 1);
rb_define_singleton_method(testing_module, "_native_recorder_after_gc_step", _native_recorder_after_gc_step, 1);
Expand Down Expand Up @@ -1006,34 +1000,6 @@ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recor
return heap_recorder_testonly_debug(state->heap_recorder);
}

#pragma GCC diagnostic push
// rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop.
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#pragma GCC diagnostic ignored "-Wunused-parameter"
// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) {
#ifdef HAVE_WORKING_RB_GC_FORCE_RECYCLE
rb_gc_force_recycle(obj);
#endif
return Qnil;
}
#pragma GCC diagnostic pop

// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) {
#ifndef NO_SEEN_OBJ_ID_FLAG
if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) {
return Qtrue;
} else {
return Qfalse;
}
#else
return Qfalse;
#endif
}

static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
Expand Down
Loading

0 comments on commit f7780e2

Please sign in to comment.