diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000000..a17b1ddb2a8 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,133 @@ +# Please do NOT manually edit this file. +# This file is generated by 'bundle exec rake github:actions:test_template' +--- +name: Unit Tests +'on': + push: + branches: + - master + - poc/** + schedule: + - cron: 0 7 * * * +concurrency: + group: "${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: "${{ github.ref != 'refs/heads/master' }}" +jobs: + compute_tasks: + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + engine: + - name: ruby + version: '3.3' + alias: ruby-33 + - name: ruby + version: '3.2' + alias: ruby-32 + container: + image: ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }} + outputs: + ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}" + ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}" + steps: + - uses: actions/checkout@v4 + - run: bundle install + - id: set-matrix + run: | + matrix_json=$(bundle exec rake github:generate_matrix) + # Debug output + echo "Generated JSON:" + echo "$matrix_json" + # Set the output + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json")" >> $GITHUB_OUTPUT + - run: bundle cache + - uses: actions/upload-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-${{ matrix.engine.alias }} + retention-days: 1 + path: | + Gemfile.lock + vendor/ + test-ruby-33: + name: 'ruby-3.3: ${{ matrix.task }} (${{ matrix.group }})' + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-33-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.3 + env: + TEST_POSTGRES_HOST: postgres + TEST_REDIS_HOST: redis + services: + postgres: + image: postgres:9.6 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + env: + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_DB: postgres + redis: + image: redis:6.2 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + steps: + - uses: actions/checkout@v4 + - name: Configure Git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-33 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} + test-ruby-32: + name: 'ruby-3.2: ${{ matrix.task }} (${{ matrix.group }})' + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-32-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.2 + env: + TEST_POSTGRES_HOST: postgres + TEST_REDIS_HOST: redis + services: + postgres: + image: postgres:9.6 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + env: + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_DB: postgres + redis: + image: redis:6.2 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + steps: + - uses: actions/checkout@v4 + - name: Configure Git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-32 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index ec9a291a5c0..2ced6f01622 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -162,6 +162,7 @@ ddprof-benchmark: benchmarks: stage: benchmarks when: always + needs: [] tags: ["runner:apm-k8s-tweaked-metal"] image: $BASE_CI_IMAGE interruptible: true diff --git a/.vscode/settings.json b/.vscode/settings.json index 605e69ae1d0..81ce33a596e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,7 @@ { "files.associations": { "*.gemfile": "ruby", + "Matrixfile": "ruby", "Dockerfile*": "dockerfile" } } diff --git a/Matrixfile b/Matrixfile index 304a1a3a48e..2fa275e5f14 100644 --- a/Matrixfile +++ b/Matrixfile @@ -258,6 +258,9 @@ 'redis-4' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'redis-5' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby' }, + 'appsec:active_record' => { + 'relational_db' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', + }, 'appsec:rack' => { 'rack-latest' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'rack-3' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', diff --git a/Rakefile b/Rakefile index 41426800e51..5ddc8e14b16 100644 --- a/Rakefile +++ b/Rakefile @@ -267,7 +267,7 @@ namespace :spec do end namespace :appsec do - task all: [:main, :rack, :rails, :sinatra, :devise, :graphql] + task all: [:main, :active_record, :rack, :rails, :sinatra, :devise, :graphql] # Datadog AppSec main specs desc '' # "Explicitly hiding from `rake -T`" @@ -280,6 +280,7 @@ namespace :spec do # Datadog AppSec integrations [ + :active_record, :rack, :sinatra, :rails, diff --git a/ext/datadog_profiling_native_extension/extconf.rb b/ext/datadog_profiling_native_extension/extconf.rb index 4acadd68215..bac21c65b69 100644 --- a/ext/datadog_profiling_native_extension/extconf.rb +++ b/ext/datadog_profiling_native_extension/extconf.rb @@ -170,11 +170,6 @@ def skip_building_extension!(reason) # On older Rubies, there was no jit_return member on the rb_control_frame_t struct $defs << "-DNO_JIT_RETURN" if RUBY_VERSION < "3.1" -# On older Rubies, rb_gc_force_recycle allowed to free objects in a way that -# would be invisible to free tracepoints, finalizers and without cleaning -# obj_to_id_tbl mappings. -$defs << "-DHAVE_WORKING_RB_GC_FORCE_RECYCLE" if RUBY_VERSION < "3.1" - # On older Rubies, there are no Ractors $defs << "-DNO_RACTORS" if RUBY_VERSION < "3" @@ -184,9 +179,6 @@ def skip_building_extension!(reason) # On older Rubies, objects would not move $defs << "-DNO_T_MOVED" if RUBY_VERSION < "2.7" -# On older Rubies, there was no RUBY_SEEN_OBJ_ID flag -$defs << "-DNO_SEEN_OBJ_ID_FLAG" if RUBY_VERSION < "2.7" - # On older Rubies, rb_global_vm_lock_struct did not include the owner field $defs << "-DNO_GVL_OWNER" if RUBY_VERSION < "2.6" diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index ea1c2ee5e4c..d186a1a0fba 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -7,10 +7,6 @@ #include "libdatadog_helpers.h" #include "time_helpers.h" -#if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG)) - #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND -#endif - // Minimum age (in GC generations) of heap objects we want to include in heap // recorder iterations. Object with age 0 represent objects that have yet to undergo // a GC and, thus, may just be noise/trash at instant of iteration and are usually not @@ -123,9 +119,6 @@ typedef struct { // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording. // When NULL, this symbolizes an unstarted/invalid recording. object_record *object_record; - // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object - // as part of our workaround around rb_gc_force_recycle issues. - bool did_recycle_workaround; } recording; struct heap_recorder { @@ -342,46 +335,12 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); } - bool did_recycle_workaround = false; - - #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND - // If we are in a ruby version that has a working rb_gc_force_recycle implementation, - // its usage may lead to an object being re-used outside of the typical GC cycle. - // - // This re-use is in theory invisible to us unless we're lucky enough to sample both - // the original object and the replacement that uses the recycled slot. - // - // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366) - // that non-noop implementations of rb_gc_force_recycle have an implementation bug - // which results in the object that re-used the recycled slot inheriting the same - // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to - // "observe" implicit frees when an object we are tracking is force-recycled. - // - // However, it may happen that we start tracking a new object and that object was - // allocated on a recycled slot. Due to the bug, this object would be missing the - // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be, - // when we're doing our liveness check, the absence of the flag would trigger our - // implicit free workaround and the object would be inferred as recycled even though - // it might still be alive. - // - // Thus, if we detect that this new allocation is already missing the flag at the start - // of the heap allocation recording, we force-set it. This should be safe since we - // just called rb_obj_id on it above and the expectation is that any flaggable object - // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT - // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14). - if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) { - RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID); - did_recycle_workaround = true; - } - #endif - heap_recorder->active_recording = (recording) { .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) { .weight = weight * heap_recorder->sample_rate, .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL, .alloc_gen = rb_gc_count(), - }), - .did_recycle_workaround = did_recycle_workaround, + }), }; } @@ -685,41 +644,6 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext // If we got this far, then we found a valid live object for the tracked id. - #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND - // If we are in a ruby version that has a working rb_gc_force_recycle implementation, - // its usage may lead to an object being re-used outside of the typical GC cycle. - // - // This re-use is in theory invisible to us and would mean that the ref from which we - // collected the object_record metadata may not be the same as the current ref and - // thus any further reporting would be innacurately attributed to stale metadata. - // - // In practice, there is a way for us to notice that this happened because of a bug - // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object - // ids and id2ref to detect whether objects are still alive. Turns out that when an - // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID - // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment - // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517 - // - // Thus, if we detect that the ref we just resolved above is missing this flag, we can - // safely say re-use happened and thus treat it as an implicit free of the object - // we were tracking (the original one which got recycled). - if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) { - - // NOTE: We don't really need to set this flag for heap recorder to work correctly - // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle - // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj - // tables in objspace. We already do the same thing when we sample a recycled object, - // here we apply it as well to objects that replace recycled objects that were being - // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366 - RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID); - - on_committed_object_record_cleanup(recorder, record); - recorder->stats_last_update.objects_dead++; - return ST_DELETE; - } - - #endif - if ( recorder->size_enabled && recorder->update_include_old && // We only update sizes when doing a full update @@ -803,18 +727,12 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t * object_record *new_object_record = recording.object_record; if (existing) { object_record *existing_record = (object_record*) (*value); - if (recording.did_recycle_workaround) { - // In this case, it's possible for an object id to be re-used and we were lucky enough to have - // sampled both the original object and the replacement so cleanup the old one and replace it with - // the new object_record (i.e. treat this as a combined free+allocation). - on_committed_object_record_cleanup(update_data->heap_recorder, existing_record); - } else { - // This is not supposed to happen, raising... - VALUE existing_inspect = object_record_inspect(existing_record); - VALUE new_inspect = object_record_inspect(new_object_record); - rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " - "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect); - } + + // This is not supposed to happen, raising... + VALUE existing_inspect = object_record_inspect(existing_record); + VALUE new_inspect = object_record_inspect(new_object_record); + rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " + "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect); } // Always carry on with the update, we want the new record to be there at the end (*value) = (st_data_t) new_object_record; diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index dbbebd531d0..59e5c644882 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -158,7 +158,7 @@ bool is_current_thread_holding_the_gvl(void) { // // Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`. // - // For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 by default, and we print a + // For this reason we default to use the "no signals workaround" on Ruby 2.5 by default, and we print a // warning when customers force-enable it. bool gvl_acquired = vm->gvl.acquired != 0; rb_thread_t *current_owner = vm->running_thread; diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index 31165ba8b3b..710b17356e2 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -258,8 +258,6 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); -static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj); -static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj); static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance); static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns); static VALUE _native_is_object_recorded(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE object_id); @@ -297,10 +295,6 @@ void stack_recorder_init(VALUE profiling_module) { _native_end_fake_slow_heap_serialization, 1); rb_define_singleton_method(testing_module, "_native_debug_heap_recorder", _native_debug_heap_recorder, 1); - rb_define_singleton_method(testing_module, "_native_gc_force_recycle", - _native_gc_force_recycle, 1); - rb_define_singleton_method(testing_module, "_native_has_seen_id_flag", - _native_has_seen_id_flag, 1); rb_define_singleton_method(testing_module, "_native_is_object_recorded?", _native_is_object_recorded, 2); rb_define_singleton_method(testing_module, "_native_heap_recorder_reset_last_update", _native_heap_recorder_reset_last_update, 1); rb_define_singleton_method(testing_module, "_native_recorder_after_gc_step", _native_recorder_after_gc_step, 1); @@ -1006,34 +1000,6 @@ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recor return heap_recorder_testonly_debug(state->heap_recorder); } -#pragma GCC diagnostic push -// rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop. -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#pragma GCC diagnostic ignored "-Wunused-parameter" -// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. -// It SHOULD NOT be used for other purposes. -static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) { - #ifdef HAVE_WORKING_RB_GC_FORCE_RECYCLE - rb_gc_force_recycle(obj); - #endif - return Qnil; -} -#pragma GCC diagnostic pop - -// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. -// It SHOULD NOT be used for other purposes. -static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) { - #ifndef NO_SEEN_OBJ_ID_FLAG - if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) { - return Qtrue; - } else { - return Qfalse; - } - #else - return Qfalse; - #endif -} - static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); diff --git a/lib/datadog/appsec.rb b/lib/datadog/appsec.rb index 940bbc4b4b6..5f96dbdd5fa 100644 --- a/lib/datadog/appsec.rb +++ b/lib/datadog/appsec.rb @@ -56,6 +56,7 @@ def components require_relative 'appsec/contrib/rack/integration' require_relative 'appsec/contrib/sinatra/integration' require_relative 'appsec/contrib/rails/integration' +require_relative 'appsec/contrib/active_record/integration' require_relative 'appsec/contrib/devise/integration' require_relative 'appsec/contrib/graphql/integration' diff --git a/lib/datadog/appsec/contrib/active_record/instrumentation.rb b/lib/datadog/appsec/contrib/active_record/instrumentation.rb new file mode 100644 index 00000000000..3b226a9aafb --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/instrumentation.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec module that will be prepended to ActiveRecord adapter + module Instrumentation + module_function + + def detect_sql_injection(sql, adapter_name) + scope = AppSec.active_scope + return unless scope + + # libddwaf expects db system to be lowercase, + # in case of sqlite adapter, libddwaf expects 'sqlite' as db system + db_system = adapter_name.downcase + db_system = 'sqlite' if db_system == 'sqlite3' + + ephemeral_data = { + 'server.db.statement' => sql, + 'server.db.system' => db_system + } + + waf_timeout = Datadog.configuration.appsec.waf_timeout + result = scope.processor_context.run({}, ephemeral_data, waf_timeout) + + if result.status == :match + Datadog::AppSec::Event.tag_and_keep!(scope, result) + + event = { + waf_result: result, + trace: scope.trace, + span: scope.service_entry_span, + sql: sql, + actions: result.actions + } + scope.processor_context.events << event + end + end + + # patch for all adapters in ActiveRecord >= 7.1 + module InternalExecQueryAdapterPatch + def internal_exec_query(sql, *args, **rest) + Instrumentation.detect_sql_injection(sql, adapter_name) + + super + end + end + + # patch for postgres adapter in ActiveRecord < 7.1 + module ExecuteAndClearAdapterPatch + def execute_and_clear(sql, *args, **rest) + Instrumentation.detect_sql_injection(sql, adapter_name) + + super + end + end + + # patch for mysql2 and sqlite3 adapters in ActiveRecord < 7.1 + # this patch is also used when using JDBC adapter + module ExecQueryAdapterPatch + def exec_query(sql, *args, **rest) + Instrumentation.detect_sql_injection(sql, adapter_name) + + super + end + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/integration.rb b/lib/datadog/appsec/contrib/active_record/integration.rb new file mode 100644 index 00000000000..00002f491d8 --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/integration.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require_relative '../integration' +require_relative 'patcher' + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # This class provides helper methods that are used when patching ActiveRecord + class Integration + include Datadog::AppSec::Contrib::Integration + + MINIMUM_VERSION = Gem::Version.new('4') + + register_as :active_record, auto_patch: false + + def self.version + Gem.loaded_specs['activerecord'] && Gem.loaded_specs['activerecord'].version + end + + def self.loaded? + !defined?(::ActiveRecord).nil? + end + + def self.compatible? + super && version >= MINIMUM_VERSION + end + + def self.auto_instrument? + true + end + + def patcher + Patcher + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/patcher.rb b/lib/datadog/appsec/contrib/active_record/patcher.rb new file mode 100644 index 00000000000..dd0c6c220ae --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/patcher.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require_relative '../patcher' +require_relative 'instrumentation' + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec patcher module for ActiveRecord + module Patcher + include Datadog::AppSec::Contrib::Patcher + + module_function + + def patched? + Patcher.instance_variable_get(:@patched) + end + + def target_version + Integration.version + end + + def patch + ActiveSupport.on_load :active_record do + instrumentation_module = if ::ActiveRecord.gem_version >= Gem::Version.new('7.1') + Instrumentation::InternalExecQueryAdapterPatch + else + Instrumentation::ExecQueryAdapterPatch + end + + if defined?(::ActiveRecord::ConnectionAdapters::SQLite3Adapter) + ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(instrumentation_module) + end + + if defined?(::ActiveRecord::ConnectionAdapters::Mysql2Adapter) + ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(instrumentation_module) + end + + if defined?(::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter) + unless defined?(::ActiveRecord::ConnectionAdapters::JdbcAdapter) + instrumentation_module = Instrumentation::ExecuteAndClearAdapterPatch + end + + ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(instrumentation_module) + end + end + end + end + end + end + end +end diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 19239a70c78..86f8fb798df 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -207,28 +207,16 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) return false unless heap_profiling_enabled - if RUBY_VERSION.start_with?("2.") && RUBY_VERSION < "2.7" + if RUBY_VERSION < "3.1" Datadog.logger.warn( - "Heap profiling currently relies on features introduced in Ruby 2.7 and will be forcibly disabled. " \ - "Please upgrade to Ruby >= 2.7 in order to use this feature." + "Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM limitations. " \ + "Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled." ) return false end - if RUBY_VERSION < "3.1" - Datadog.logger.debug( - "Current Ruby version (#{RUBY_VERSION}) supports forced object recycling which has a bug that the " \ - "heap profiler is forced to work around to remain accurate. This workaround requires force-setting " \ - "the SEEN_OBJ_ID flag on objects that should have it but don't. Full details can be found in " \ - "https://github.com/DataDog/dd-trace-rb/pull/3360. This workaround should be safe but can be " \ - "bypassed by disabling the heap profiler or upgrading to Ruby >= 3.1 where forced object recycling " \ - "was completely removed (https://bugs.ruby-lang.org/issues/18290)." - ) - end - unless allocation_profiling_enabled - raise ArgumentError, - "Heap profiling requires allocation profiling to be enabled" + raise ArgumentError, "Heap profiling requires allocation profiling to be enabled" end Datadog.logger.warn( diff --git a/sig/datadog/appsec/contrib/active_record/instrumentation.rbs b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs new file mode 100644 index 00000000000..daea0a9f684 --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs @@ -0,0 +1,23 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + module Instrumentation + def self?.detect_sql_injection: (String sql, String adapter_name) -> void + + module InternalExecQueryAdapterPatch + def internal_exec_query: (String sql, *untyped args, **untyped rest) -> untyped + end + + module ExecuteAndClearAdapterPatch + def execute_and_clear: (String sql, *untyped args, **untyped rest) -> untyped + end + + module ExecQueryAdapterPatch + def exec_query: (String sql, *untyped args, **untyped rest) -> untyped + end + end + end + end + end +end diff --git a/sig/datadog/appsec/contrib/active_record/integration.rbs b/sig/datadog/appsec/contrib/active_record/integration.rbs new file mode 100644 index 00000000000..781f186efc1 --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/integration.rbs @@ -0,0 +1,23 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + class Integration + include Datadog::AppSec::Contrib::Integration + + MINIMUM_VERSION: Gem::Version + + def self.version: () -> Gem::Version? + + def self.loaded?: () -> bool + + def self.compatible?: () -> bool + + def self.auto_instrument?: () -> true + + def patcher: () -> class + end + end + end + end +end diff --git a/sig/datadog/appsec/contrib/active_record/patcher.rbs b/sig/datadog/appsec/contrib/active_record/patcher.rbs new file mode 100644 index 00000000000..2850b974c00 --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/patcher.rbs @@ -0,0 +1,17 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + module Patcher + include Datadog::AppSec::Contrib::Patcher + + def self?.patched?: () -> bool + + def self?.target_version: () -> Gem::Version? + + def self?.patch: () -> void + end + end + end + end +end diff --git a/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb new file mode 100644 index 00000000000..6c5777adc1f --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'mysql2' +end + +RSpec.describe 'AppSec ActiveRecord integration for Mysql2 adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'mysql2', + database: ENV.fetch('TEST_MYSQL_DB', 'mysql'), + host: ENV.fetch('TEST_MYSQL_HOST', '127.0.0.1'), + password: ENV.fetch('TEST_MYSQL_ROOT_PASSWORD', 'root'), + port: ENV.fetch('TEST_MYSQL_PORT', '3306') + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT `users`.* FROM `users` WHERE `users`.`name` = 'Bob'", + 'server.db.system' => 'mysql2' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'mysql2' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end diff --git a/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb new file mode 100644 index 00000000000..5ebd8e0cacb --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'pg' +end + +RSpec.describe 'AppSec ActiveRecord integration for Postgresql adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'postgresql', + database: ENV.fetch('TEST_POSTGRES_DB', 'postgres'), + host: ENV.fetch('TEST_POSTGRES_HOST', '127.0.0.1'), + port: ENV.fetch('TEST_POSTGRES_PORT', 5432), + username: ENV.fetch('TEST_POSTGRES_USER', 'postgres'), + password: ENV.fetch('TEST_POSTGRES_PASSWORD', 'postgres') + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expected_db_statement = if PlatformHelpers.jruby? + 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' + else + 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' + end + + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => expected_db_statement, + 'server.db.system' => 'postgresql' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'postgresql' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end diff --git a/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb new file mode 100644 index 00000000000..778fe952a30 --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'sqlite3' +end + +RSpec.describe 'AppSec ActiveRecord integration for SQLite3 adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'sqlite3', + database: ':memory:' + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => 'SELECT "users".* FROM "users" WHERE "users"."name" = ?', + 'server.db.system' => 'sqlite' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'sqlite' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end diff --git a/spec/datadog/core/environment/execution_spec.rb b/spec/datadog/core/environment/execution_spec.rb index 274be863444..ba709c90601 100644 --- a/spec/datadog/core/environment/execution_spec.rb +++ b/spec/datadog/core/environment/execution_spec.rb @@ -67,8 +67,7 @@ end context 'when in a Pry session' do - # Temporarily skipped for release 2.7.1 - xit 'returns true' do + it 'returns true' do Tempfile.create('test') do |f| f.write(repl_script) f.close diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index f91d021c865..2320c482ce6 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -52,6 +52,8 @@ skip_if_profiling_not_supported(self) settings.profiling.enabled = true + # Disabled to avoid warnings on Rubies where it's not supported; there's separate specs that test it when enabled + settings.profiling.advanced.gc_enabled = false allow(profiler_setup_task).to receive(:run) end @@ -306,15 +308,15 @@ stub_const("RUBY_VERSION", testing_version) end - context "on a Ruby older than 2.7" do - let(:testing_version) { "2.6" } + context "on a Ruby older than 3.1" do + let(:testing_version) { "3.0" } it "initializes StackRecorder without heap sampling support and warns" do expect(Datadog::Profiling::StackRecorder).to receive(:new) .with(hash_including(heap_samples_enabled: false, heap_size_enabled: false)) .and_call_original - expect(Datadog.logger).to receive(:warn).with(/upgrade to Ruby >= 2.7/) + expect(Datadog.logger).to receive(:warn).with(/upgrade to Ruby >= 3.1/) build_profiler_component end @@ -365,23 +367,6 @@ build_profiler_component end end - - context "on a Ruby older than 3.1" do - let(:testing_version) { "2.7" } - - it "initializes StackRecorder with heap sampling support but shows warning and debug messages" do - expect(Datadog::Profiling::StackRecorder).to receive(:new) - .with(hash_including(heap_samples_enabled: true)) - .and_call_original - - expect(Datadog.logger).to receive(:debug).with(/Enabled allocation profiling/) - expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) - expect(Datadog.logger).to receive(:warn).with(/experimental heap size profiling/) - expect(Datadog.logger).to receive(:debug).with(/forced object recycling.+upgrading to Ruby >= 3.1/) - - build_profiler_component - end - end end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 9b2c644a192..9f1ab10e6a0 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -666,136 +666,6 @@ def sample_allocation(obj) end end - context "on Rubies supporting rb_gc_force_recycle" do - before do - skip "rb_gc_force_recycle is a no-op in current Ruby version" if RUBY_VERSION >= "3.1" - @recycled_sample_allocation_line = 0 - end - - def has_seen_id_flag(obj) - described_class::Testing._native_has_seen_id_flag(obj) - end - - # This method attempts to allocate an object on a recycled heap slot. - # - # Heap slot recycling was a troublesome feature that has been removed from Rubies >= 3.1 - # in which an object could be freed through a fast-path that bypassed a lot of runtime - # machinery such as finalizers or object id tracking and thus introduced a fair amount - # of buggy behaviour. Some of this buggy behaviour manifests when a recycled slot gets - # re-used by a new live object: the new live object id will be the same as the id of - # the object that was recycled, violating a core constraint of Ruby objects: object ids - # are unique and non-repeatable. - # - # Recycling an object slot is easy (accomplished by a rb_gc_force_recycle native method call). - # More difficult is allocating an object on a recycled slot. Ruby gives us no control on - # where to allocate an object so we have to play a probability game. This method attempts to - # maximize our chances of quickly getting an object in a recycled slot by: - # 1. Force recycling 1000 objects. - # 2. Repeatedly allocating 1000 objects and keeping references to them, thus preventing GC - # from reclaiming their slots. - # 3. Checking if any of the ids of the 1000 recycled objects now map to a live object. If - # that happens, then we know that live object was allocated on a recycled slot and we - # can return it. - def create_obj_in_recycled_slot(should_sample_original: false) - # Force-recycle 1000 objects. - # NOTE: In theory, a single force recycle would suffice but the more recycled slots - # there are to use the more probable it is for a new allocation to use it. - recycled_obj_ids = [] - 1000.times do - obj = Object.new - sample_allocation(obj) if should_sample_original - @recycled_sample_allocation_line = __LINE__ - 1 - - # Get the id of the object we're about to recycle - recycled_obj_ids << obj.object_id - - # Force recycle the given object - described_class::Testing._native_gc_force_recycle(obj) - end - - # Repeatedly allocate objects until we find one that resolves to the id of one of - # the force recycled objects - objs = [] - 100.times do - # Instead of doing this one at a time which would be slow given id2ref will - # raise on failure, allocate a ton of objects each time, increasing the - # probability of getting a hit on each iteration - # NOTE: We keep the object references around to prevent GCs from constantly - # freeing up slots from the previous iteration. Thus each consecutive - # iteration should get one step closer to re-using one of the recycled - # slots. This should not lead to OOMs since we know there are 1000 - # free recycled slots available (we recycled them above). At the very - # limit we'd expect the Ruby VM to prefer to re-use those slots rather - # than expand heap pages and when that happens we'd stop iterating. - 1000.times { objs << Object.new } - recycled_obj_ids.each do |obj_id| - return ObjectSpace._id2ref(obj_id) - rescue RangeError # rubocop:disable Lint/SuppressedException - end - end - raise "could not allocate an object in a recycled slot" - end - - it "enforces seen id flag on objects on recycled slots that get sampled" do - recycled_obj = create_obj_in_recycled_slot - - expect(has_seen_id_flag(recycled_obj)).to be false - - sample_allocation(recycled_obj) - - expect(has_seen_id_flag(recycled_obj)).to be true - end - - it "enforces seen id flag on untracked objects that replace tracked recycled objects" do - recycled_obj = create_obj_in_recycled_slot(should_sample_original: true) - - expect(has_seen_id_flag(recycled_obj)).to be false - - serialize - - expect(has_seen_id_flag(recycled_obj)).to be true - end - - it "correctly handles lifecycle of objects on recycled slots that get sampled" do - recycled_obj = create_obj_in_recycled_slot - - sample_allocation(recycled_obj) - sample_line = __LINE__ - 1 - - GC.start # Ensure recycled sample has age > 0 so it shows up in serialized profile - - recycled_sample = heap_samples.find { |s| s.has_location?(path: __FILE__, line: sample_line) } - expect(recycled_sample).not_to be nil - end - - it "supports allocation samples with duplicate ids due to force recycling" do - recycled_obj = create_obj_in_recycled_slot(should_sample_original: true) - - expect { sample_allocation(recycled_obj) }.not_to raise_error - end - - it "raises on allocation samples with duplicate ids that are not due to force recycling" do - obj = Object.new - - sample_allocation(obj) - - expect { sample_allocation(obj) }.to raise_error(/supposed to be unique/) - end - - it "can detect implicit frees due to slot recycling" do - live_objects = [] - live_objects << create_obj_in_recycled_slot(should_sample_original: true) - - # If we act on implicit frees, then we assume that even though there's a live object - # in the same slot as the original one we were tracking, we'll be able to detect this - # recycling, clean up that record and not include it in the final heap samples - relevant_sample = heap_samples.find do |s| - s.has_location?(path: __FILE__, line: @recycled_sample_allocation_line) - end - expect(relevant_sample).to be nil - end - end - # NOTE: This is a regression test that exceptions in end_heap_allocation_recording_with_rb_protect are safely # handled by the stack_recorder. context "when the heap sampler raises an exception during _native_sample" do diff --git a/tasks/github.rake b/tasks/github.rake new file mode 100644 index 00000000000..34a867e272b --- /dev/null +++ b/tasks/github.rake @@ -0,0 +1,232 @@ +require 'json' +require "psych" +require 'ostruct' +require_relative 'appraisal_conversion' + +# rubocop:disable Metrics/BlockLength +namespace :github do + namespace :actions do + task :test_template do |t| + ubuntu = "ubuntu-22.04" + + # Still being rate limited + docker_login_credentials = { + "username" => '${{ secrets.DOCKERHUB_USERNAME }}', + "password" => '${{ secrets.DOCKERHUB_TOKEN }}' + } + + postgres = { + "image" => "postgres:9.6", + "credentials" => docker_login_credentials, + "env" => { + "POSTGRES_PASSWORD" => "postgres", + "POSTGRES_USER" => "postgres", + "POSTGRES_DB" => "postgres", + } + } + + redis = { + "image" => "redis:6.2", + "credentials" => docker_login_credentials, + } + + runtimes = [ + "ruby:3.3", + "ruby:3.2", + # "ruby:3.1", + # "ruby:3.0", + # "ruby:2.7", + # "ruby:2.6", + # "ruby:2.5", + # "jruby:9.4", + # "jruby:9.3", + # "jruby:9.2", + ].map do |runtime| + engine, version = runtime.split(':') + runtime_alias = "#{engine}-#{version.gsub('.', '')}" + + OpenStruct.new( + "engine" => engine, + "version" => version, + "alias" => runtime_alias, + "image" => "ghcr.io/datadog/images-rb/engines/#{engine}:#{version}" + ) + end + + test_jobs = runtimes.map do |runtime| + { + "test-#{runtime.alias}" => { + "name" => "#{runtime.engine}-#{runtime.version}: ${{ matrix.task }} (${{ matrix.group }})", + "needs" => ["compute_tasks"], + "runs-on" => ubuntu, + "strategy" => { + "fail-fast" => false, + "matrix" => { + "include" => "${{ fromJson(needs.compute_tasks.outputs.#{runtime.alias}-matrix) }}" + } + }, + "container" => { + "image" => runtime.image, + "env" => { + "TEST_POSTGRES_HOST" => "postgres", + "TEST_REDIS_HOST" => "redis", + } + }, + "services" => { + "postgres" => postgres, + "redis" => redis, + }, + "steps" => [ + { "uses" => "actions/checkout@v4" }, + { + "name" => "Configure Git", + "run" => 'git config --global --add safe.directory "$GITHUB_WORKSPACE"' + }, + { + "uses" => "actions/download-artifact@v4", + "with" => { + "name" => "bundled-dependencies-${{ github.run_id }}-#{runtime.alias}", + } + }, + { "run" => "bundle install --local" }, + { + "name" => "Test ${{ matrix.task }} with ${{ matrix.gemfile }}", + "env" => { "BUNDLE_GEMFILE" => "${{ matrix.gemfile }}" }, + "run" => "bundle install && bundle exec rake spec:${{ matrix.task }}" + } + ] + } + } + end + + compute_tasks = { + "runs-on" => ubuntu, + "strategy" => { + "fail-fast" => false, + "matrix" => { + "engine" => runtimes.map do |runtime| + { "name" => runtime.engine, "version" => runtime.version, "alias" => runtime.alias } + end + } + }, + "container" =>{ + "image" => "ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }}" + }, + "outputs" => runtimes.each_with_object({}) do |runtime, hash| + hash["#{runtime.alias}-matrix"] = "${{ steps.set-matrix.outputs.#{runtime.alias} }}" + end, + "steps" => [ + { "uses" => "actions/checkout@v4" }, + { "run" => "bundle install" }, + { + "id" => "set-matrix", + "run" => <<~BASH + matrix_json=$(bundle exec rake github:generate_matrix) + # Debug output + echo "Generated JSON:" + echo "$matrix_json" + # Set the output + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json")" >> $GITHUB_OUTPUT + BASH + }, + { "run" => "bundle cache" }, + { + "uses" => "actions/upload-artifact@v4", + "with" => { + "name" => "bundled-dependencies-${{ github.run_id }}-${{ matrix.engine.alias }}", + "retention-days" => 1, + "path" => <<~STRING + Gemfile.lock + vendor/ + STRING + } + }, + ] + } + + base = { + "name" => 'Unit Tests', + "on" => { + "push" => { + "branches" => [ + "master", + "poc/**", + ] + }, + "schedule" => [ + { "cron" => '0 7 * * *' } + ] + }, + "concurrency" => { + "group" => '${{ github.workflow }}-${{ github.ref }}', + "cancel-in-progress" => '${{ github.ref != \'refs/heads/master\' }}' + }, + "jobs" => { + "compute_tasks" => compute_tasks, + **test_jobs.reduce(&:merge) + } + } + + # `Psych.dump` directly creates anchors, but Github Actions does not support anchors for YAML, + # convert to JSON first to avoid anchors + json = JSON.dump(base) + yaml = Psych.safe_load(json) + + string = +"" + string << <<~EOS + # Please do NOT manually edit this file. + # This file is generated by 'bundle exec rake #{t.name}' + EOS + string << Psych.dump(yaml, line_width: 120) + File.binwrite(".github/workflows/test.yml", string) + end + end + + task :generate_matrix do + matrix = eval(File.read('Matrixfile')).freeze # rubocop:disable Security/Eval + + candidates = [ + 'main', + 'pg', + 'rack', + 'redis', + 'sinatra', + 'stripe' + ] + + remainders = matrix.keys - candidates + + if remainders.empty? + raise "No remainder found. Use the matrix directly (without candidate filtering)." + end + + matrix = matrix.slice(*candidates) + + ruby_version = RUBY_VERSION[0..2] + major, minor, = Gem::Version.new(RUBY_ENGINE_VERSION).segments + ruby_runtime = "#{RUBY_ENGINE}-#{major}.#{minor}" + array = [] + matrix.each do |key, spec_metadata| + spec_metadata.each do |group, rubies| + matched = if RUBY_PLATFORM == 'java' + rubies.include?("✅ #{ruby_version}") && rubies.include?('✅ jruby') + else + rubies.include?("✅ #{ruby_version}") + end + + if matched + gemfile = AppraisalConversion.to_bundle_gemfile(group) rescue "Gemfile" + + array << { + group: group, + gemfile: gemfile, + task: key + } + end + end + end + + puts JSON.dump(array) + end +end +# rubocop:enable Metrics/BlockLength