From d30cb1878c8603cce5a71ec66104d39ede19ef8d Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 2 Dec 2024 10:04:39 +0000 Subject: [PATCH 01/30] Minor: Disable gc profiling in specs to avoid warnings On Ruby 3.0, the default for `gc_enabled = true` emits a warning stating it can't be used. This warning is a bit annoying in our tests; let's disable it and only rely on it being enabled on the specs that are actually testing this feature. --- spec/datadog/profiling/component_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index f91d021c865..573c76b8bdf 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -52,6 +52,8 @@ skip_if_profiling_not_supported(self) settings.profiling.enabled = true + # Disabled to avoid warnings on Rubies where it's not supported; there's separate specs that test it when enabled + settings.profiling.advanced.gc_enabled = false allow(profiler_setup_task).to receive(:run) end From ea1a525fa369dcc60de04a0aef8e332c21b4e830 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 2 Dec 2024 10:09:01 +0000 Subject: [PATCH 02/30] Fix outdated comment It's no longer true that we don't use the new Ruby profiler on Ruby 2.5; what's true is that we enable the "no signals workaround" to avoid the potentially-problematic code path. --- ext/datadog_profiling_native_extension/private_vm_api_access.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/datadog_profiling_native_extension/private_vm_api_access.c b/ext/datadog_profiling_native_extension/private_vm_api_access.c index dbbebd531d0..59e5c644882 100644 --- a/ext/datadog_profiling_native_extension/private_vm_api_access.c +++ b/ext/datadog_profiling_native_extension/private_vm_api_access.c @@ -158,7 +158,7 @@ bool is_current_thread_holding_the_gvl(void) { // // Thus an incorrect `is_current_thread_holding_the_gvl` result may lead to issues inside `rb_postponed_job_register_one`. // - // For this reason we currently do not enable the new Ruby profiler on Ruby 2.5 by default, and we print a + // For this reason we default to use the "no signals workaround" on Ruby 2.5 by default, and we print a // warning when customers force-enable it. bool gvl_acquired = vm->gvl.acquired != 0; rb_thread_t *current_owner = vm->running_thread; From 997e30d583d6214349d0d8aebc4b6ca200af2904 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Thu, 14 Nov 2024 10:31:36 +0100 Subject: [PATCH 03/30] Add ActiveRecord instrumentation to detect SQLi in AppSec --- lib/datadog/appsec.rb | 1 + .../contrib/active_record/integration.rb | 41 +++++++++++++++++ .../active_record/mysql2_adapter_patch.rb | 39 ++++++++++++++++ .../appsec/contrib/active_record/patcher.rb | 45 +++++++++++++++++++ .../active_record/postgresql_adapter_patch.rb | 39 ++++++++++++++++ .../active_record/sqlite3_adapter_patch.rb | 40 +++++++++++++++++ 6 files changed, 205 insertions(+) create mode 100644 lib/datadog/appsec/contrib/active_record/integration.rb create mode 100644 lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb create mode 100644 lib/datadog/appsec/contrib/active_record/patcher.rb create mode 100644 lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb create mode 100644 lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb diff --git a/lib/datadog/appsec.rb b/lib/datadog/appsec.rb index 940bbc4b4b6..7341af4c790 100644 --- a/lib/datadog/appsec.rb +++ b/lib/datadog/appsec.rb @@ -53,6 +53,7 @@ def components end # Integrations +require_relative 'appsec/contrib/active_record/integration' require_relative 'appsec/contrib/rack/integration' require_relative 'appsec/contrib/sinatra/integration' require_relative 'appsec/contrib/rails/integration' diff --git a/lib/datadog/appsec/contrib/active_record/integration.rb b/lib/datadog/appsec/contrib/active_record/integration.rb new file mode 100644 index 00000000000..022067d4e19 --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/integration.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require_relative '../integration' +require_relative 'patcher' + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # Description of ActiveRecord integration + class Integration + include Datadog::AppSec::Contrib::Integration + + MINIMUM_VERSION = Gem::Version.new('4') + + register_as :active_record, auto_patch: false + + def self.version + Gem.loaded_specs['activerecord'] && Gem.loaded_specs['activerecord'].version + end + + def self.loaded? + !defined?(::ActiveRecord).nil? + end + + def self.compatible? + super && version >= MINIMUM_VERSION + end + + def self.auto_instrument? + true + end + + def patcher + Patcher + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb new file mode 100644 index 00000000000..cb889634aaa --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec module that will be prepended to ActiveRecord adapter + module Mysql2AdapterPatch + def internal_exec_query(sql, *args, **rest) + scope = AppSec.active_scope + return super unless scope + + ephemeral_data = { + 'server.db.statement' => sql, + 'server.db.system' => 'mysql' + } + + result = scope.processor_context.run({}, ephemeral_data, Datadog.configuration.appsec.waf_timeout) + + if result.status == :match + Datadog::AppSec::Event.tag_and_keep!(scope, result) + + event = { + waf_result: result, + trace: scope.trace, + span: scope.service_entry_span, + sql: sql, + actions: result.actions + } + scope.processor_context.events << event + end + + super + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/patcher.rb b/lib/datadog/appsec/contrib/active_record/patcher.rb new file mode 100644 index 00000000000..24806087519 --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/patcher.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require_relative '../patcher' +require_relative 'sqlite3_adapter_patch' +require_relative 'postgresql_adapter_patch' +require_relative 'mysql2_adapter_patch' + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec patcher module for ActiveRecord + module Patcher + include Datadog::AppSec::Contrib::Patcher + + module_function + + def patched? + Patcher.instance_variable_get(:@patched) + end + + def target_version + Integration.version + end + + def patch + ActiveSupport.on_load :active_record do + if defined? ::ActiveRecord::ConnectionAdapters::SQLite3Adapter + ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(SQLite3AdapterPatch) + end + + if defined? ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter + ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(PostgreSQLAdapterPatch) + end + + if defined? ::ActiveRecord::ConnectionAdapters::Mysql2Adapter + ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(Mysql2AdapterPatch) + end + end + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb new file mode 100644 index 00000000000..98f0e26f808 --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec module that will be prepended to ActiveRecord adapter + module PostgreSQLAdapterPatch + def internal_exec_query(sql, *args, **rest) + scope = AppSec.active_scope + return super unless scope + + ephemeral_data = { + 'server.db.statement' => sql, + 'server.db.system' => 'postgresql' + } + + result = scope.processor_context.run({}, ephemeral_data, Datadog.configuration.appsec.waf_timeout) + + if result.status == :match + Datadog::AppSec::Event.tag_and_keep!(scope, result) + + event = { + waf_result: result, + trace: scope.trace, + span: scope.service_entry_span, + actions: result.actions, + sql: sql + } + scope.processor_context.events << event + end + + super + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb new file mode 100644 index 00000000000..e8c0bfa6e8d --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec module that will be prepended to ActiveRecord adapter + module SQLite3AdapterPatch + def internal_exec_query(sql, *args, **rest) + scope = AppSec.active_scope + return super unless scope + + ephemeral_data = { + 'server.db.statement' => sql, + 'server.db.system' => 'sqlite' + } + + waf_timeout = Datadog.configuration.appsec.waf_timeout + result = scope.processor_context.run({}, ephemeral_data, waf_timeout) + + if result.status == :match + Datadog::AppSec::Event.tag_and_keep!(scope, result) + + event = { + waf_result: result, + trace: scope.trace, + span: scope.service_entry_span, + sql: sql, + actions: result.actions + } + scope.processor_context.events << event + end + + super + end + end + end + end + end +end From ff6863f2254f7e3edd98b6d997393d33cec107bd Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Fri, 22 Nov 2024 12:35:45 +0100 Subject: [PATCH 04/30] Add patching for older rails versions --- .../contrib/active_record/instrumentation.rb | 67 +++++++++++++++++++ .../active_record/mysql2_adapter_patch.rb | 39 ----------- .../appsec/contrib/active_record/patcher.rb | 20 ++++-- .../active_record/postgresql_adapter_patch.rb | 39 ----------- .../active_record/sqlite3_adapter_patch.rb | 40 ----------- 5 files changed, 81 insertions(+), 124 deletions(-) create mode 100644 lib/datadog/appsec/contrib/active_record/instrumentation.rb delete mode 100644 lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb delete mode 100644 lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb delete mode 100644 lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb diff --git a/lib/datadog/appsec/contrib/active_record/instrumentation.rb b/lib/datadog/appsec/contrib/active_record/instrumentation.rb new file mode 100644 index 00000000000..17ef8ee5d31 --- /dev/null +++ b/lib/datadog/appsec/contrib/active_record/instrumentation.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Datadog + module AppSec + module Contrib + module ActiveRecord + # AppSec module that will be prepended to ActiveRecord adapter + module Instrumentation + module_function + + def detect_sqli(sql, adapter_name) + scope = AppSec.active_scope + return unless scope + + ephemeral_data = { + 'server.db.statement' => sql, + 'server.db.system' => adapter_name.downcase.gsub(/\d{1}\z/, '') + } + + waf_timeout = Datadog.configuration.appsec.waf_timeout + result = scope.processor_context.run({}, ephemeral_data, waf_timeout) + + if result.status == :match + Datadog::AppSec::Event.tag_and_keep!(scope, result) + + event = { + waf_result: result, + trace: scope.trace, + span: scope.service_entry_span, + sql: sql, + actions: result.actions + } + scope.processor_context.events << event + end + end + + # patch for all adapters in ActiveRecord >= 7.1 + module InternalExecQueryAdapterPatch + def internal_exec_query(sql, *args, **rest) + Instrumentation.detect_sqli(sql, adapter_name) + + super + end + end + + # patch for postgres adapter in ActiveRecord < 7.1 + module ExecuteAndClearAdapterPatch + def execute_and_clear(sql, *args, **rest) + Instrumentation.detect_sqli(sql, adapter_name) + + super + end + end + + # patch for mysql2 and sqlite3 adapters in ActiveRecord < 7.1 + module ExecQueryAdapterPatch + def exec_query(sql, *args, **rest) + Instrumentation.detect_sqli(sql, adapter_name) + + super + end + end + end + end + end + end +end diff --git a/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb deleted file mode 100644 index cb889634aaa..00000000000 --- a/lib/datadog/appsec/contrib/active_record/mysql2_adapter_patch.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module Datadog - module AppSec - module Contrib - module ActiveRecord - # AppSec module that will be prepended to ActiveRecord adapter - module Mysql2AdapterPatch - def internal_exec_query(sql, *args, **rest) - scope = AppSec.active_scope - return super unless scope - - ephemeral_data = { - 'server.db.statement' => sql, - 'server.db.system' => 'mysql' - } - - result = scope.processor_context.run({}, ephemeral_data, Datadog.configuration.appsec.waf_timeout) - - if result.status == :match - Datadog::AppSec::Event.tag_and_keep!(scope, result) - - event = { - waf_result: result, - trace: scope.trace, - span: scope.service_entry_span, - sql: sql, - actions: result.actions - } - scope.processor_context.events << event - end - - super - end - end - end - end - end -end diff --git a/lib/datadog/appsec/contrib/active_record/patcher.rb b/lib/datadog/appsec/contrib/active_record/patcher.rb index 24806087519..61acdab9bce 100644 --- a/lib/datadog/appsec/contrib/active_record/patcher.rb +++ b/lib/datadog/appsec/contrib/active_record/patcher.rb @@ -1,9 +1,7 @@ # frozen_string_literal: true require_relative '../patcher' -require_relative 'sqlite3_adapter_patch' -require_relative 'postgresql_adapter_patch' -require_relative 'mysql2_adapter_patch' +require_relative 'instrumentation' module Datadog module AppSec @@ -26,18 +24,28 @@ def target_version def patch ActiveSupport.on_load :active_record do if defined? ::ActiveRecord::ConnectionAdapters::SQLite3Adapter - ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(SQLite3AdapterPatch) + ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(Patcher.prepended_class_name(:sqlite3)) end if defined? ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter - ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(PostgreSQLAdapterPatch) + ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(Patcher.prepended_class_name(:postgresql)) end if defined? ::ActiveRecord::ConnectionAdapters::Mysql2Adapter - ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(Mysql2AdapterPatch) + ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(Patcher.prepended_class_name(:mysql2)) end end end + + def prepended_class_name(adapter_name) + if ::ActiveRecord.gem_version >= Gem::Version.new('7.1') + Instrumentation::InternalExecQueryAdapterPatch + elsif adapter_name == :postgresql + Instrumentation::ExecuteAndClearAdapterPatch + else + Instrumentation::ExecQueryAdapterPatch + end + end end end end diff --git a/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb deleted file mode 100644 index 98f0e26f808..00000000000 --- a/lib/datadog/appsec/contrib/active_record/postgresql_adapter_patch.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -module Datadog - module AppSec - module Contrib - module ActiveRecord - # AppSec module that will be prepended to ActiveRecord adapter - module PostgreSQLAdapterPatch - def internal_exec_query(sql, *args, **rest) - scope = AppSec.active_scope - return super unless scope - - ephemeral_data = { - 'server.db.statement' => sql, - 'server.db.system' => 'postgresql' - } - - result = scope.processor_context.run({}, ephemeral_data, Datadog.configuration.appsec.waf_timeout) - - if result.status == :match - Datadog::AppSec::Event.tag_and_keep!(scope, result) - - event = { - waf_result: result, - trace: scope.trace, - span: scope.service_entry_span, - actions: result.actions, - sql: sql - } - scope.processor_context.events << event - end - - super - end - end - end - end - end -end diff --git a/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb b/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb deleted file mode 100644 index e8c0bfa6e8d..00000000000 --- a/lib/datadog/appsec/contrib/active_record/sqlite3_adapter_patch.rb +++ /dev/null @@ -1,40 +0,0 @@ -# frozen_string_literal: true - -module Datadog - module AppSec - module Contrib - module ActiveRecord - # AppSec module that will be prepended to ActiveRecord adapter - module SQLite3AdapterPatch - def internal_exec_query(sql, *args, **rest) - scope = AppSec.active_scope - return super unless scope - - ephemeral_data = { - 'server.db.statement' => sql, - 'server.db.system' => 'sqlite' - } - - waf_timeout = Datadog.configuration.appsec.waf_timeout - result = scope.processor_context.run({}, ephemeral_data, waf_timeout) - - if result.status == :match - Datadog::AppSec::Event.tag_and_keep!(scope, result) - - event = { - waf_result: result, - trace: scope.trace, - span: scope.service_entry_span, - sql: sql, - actions: result.actions - } - scope.processor_context.events << event - end - - super - end - end - end - end - end -end From 49eb123b228a91ff4f3c869a39c1fede475e7d09 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Mon, 25 Nov 2024 13:48:50 +0100 Subject: [PATCH 05/30] Add specs for AppSec ActiveRecord SQLi detection --- Matrixfile | 3 + Rakefile | 3 +- .../active_record/multi_adapter_spec.rb | 150 ++++++++++++++++++ .../contrib/active_record/patcher_spec.rb | 44 +++++ 4 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb create mode 100644 spec/datadog/appsec/contrib/active_record/patcher_spec.rb diff --git a/Matrixfile b/Matrixfile index 304a1a3a48e..2fa275e5f14 100644 --- a/Matrixfile +++ b/Matrixfile @@ -258,6 +258,9 @@ 'redis-4' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'redis-5' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby' }, + 'appsec:active_record' => { + 'relational_db' => '❌ 2.5 / ❌ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', + }, 'appsec:rack' => { 'rack-latest' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', 'rack-3' => '✅ 2.5 / ✅ 2.6 / ✅ 2.7 / ✅ 3.0 / ✅ 3.1 / ✅ 3.2 / ✅ 3.3 / ✅ 3.4 / ✅ jruby', diff --git a/Rakefile b/Rakefile index 41426800e51..5ddc8e14b16 100644 --- a/Rakefile +++ b/Rakefile @@ -267,7 +267,7 @@ namespace :spec do end namespace :appsec do - task all: [:main, :rack, :rails, :sinatra, :devise, :graphql] + task all: [:main, :active_record, :rack, :rails, :sinatra, :devise, :graphql] # Datadog AppSec main specs desc '' # "Explicitly hiding from `rake -T`" @@ -280,6 +280,7 @@ namespace :spec do # Datadog AppSec integrations [ + :active_record, :rack, :sinatra, :rails, diff --git a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb new file mode 100644 index 00000000000..7684087f8be --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb @@ -0,0 +1,150 @@ +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +require 'mysql2' +require 'sqlite3' +require 'pg' + +RSpec.describe 'AppSec ActiveRecord integration' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + end + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + shared_examples 'calls_waf_with_correct_arguments' do + it 'calls waf with correct arguments' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => expected_db_statement, + 'server.db.system' => expected_db_system + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + active_record_scope.to_a + end + end + + context 'mysql2 adapter' do + let(:db_config) do + { + adapter: 'mysql2', + database: ENV.fetch('TEST_MYSQL_DB', 'mysql'), + host: ENV.fetch('TEST_MYSQL_HOST', '127.0.0.1'), + password: ENV.fetch('TEST_MYSQL_ROOT_PASSWORD', 'root'), + port: ENV.fetch('TEST_MYSQL_PORT', '3306') + } + end + + let(:expected_db_system) { 'mysql' } + + context 'when using .where' do + let(:active_record_scope) { User.where(name: 'Bob') } + let(:expected_db_statement) { "SELECT `users`.* FROM `users` WHERE `users`.`name` = 'Bob'" } + + include_examples 'calls_waf_with_correct_arguments' + end + + context 'when using .find_by_sql' do + let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } + let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } + + include_examples 'calls_waf_with_correct_arguments' + end + end + + context 'postgres adapter' do + let(:db_config) do + { + adapter: 'postgresql', + database: ENV.fetch('TEST_POSTGRES_DB', 'postgres'), + host: ENV.fetch('TEST_POSTGRES_HOST', '127.0.0.1'), + port: ENV.fetch('TEST_POSTGRES_PORT', 5432), + username: ENV.fetch('TEST_POSTGRES_USER', 'postgres'), + password: ENV.fetch('TEST_POSTGRES_PASSWORD', 'postgres') + } + end + + let(:expected_db_system) { 'postgresql' } + + context 'when using .where' do + let(:active_record_scope) { User.where(name: 'Bob') } + let(:expected_db_statement) { 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' } + + include_examples 'calls_waf_with_correct_arguments' + end + + context 'when using .find_by_sql' do + let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } + let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } + + include_examples 'calls_waf_with_correct_arguments' + end + end + + context 'sqlite3 adapter' do + let(:db_config) do + { + adapter: 'sqlite3', + database: ':memory:' + } + end + + let(:expected_db_system) { 'sqlite' } + + context 'when using .where' do + let(:active_record_scope) { User.where(name: 'Bob') } + let(:expected_db_statement) { 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' } + + include_examples 'calls_waf_with_correct_arguments' + end + + context 'when using .find_by_sql' do + let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } + let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } + + include_examples 'calls_waf_with_correct_arguments' + end + end +end diff --git a/spec/datadog/appsec/contrib/active_record/patcher_spec.rb b/spec/datadog/appsec/contrib/active_record/patcher_spec.rb new file mode 100644 index 00000000000..976d3a0eca6 --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/patcher_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'datadog/appsec/contrib/active_record/patcher' + +RSpec.describe Datadog::AppSec::Contrib::ActiveRecord::Patcher do + describe '#prepended_class_name' do + before do + stub_const('::ActiveRecord', Struct.new(:gem_version).new(Gem::Version.new(active_record_version))) + end + + context 'when ActiveRecord version is 7.1 or higher' do + let(:active_record_version) { Gem::Version.new('7.1') } + + it 'returns Instrumentation::InternalExecQueryAdapterPatch' do + expect(described_class.prepended_class_name(:postgresql)).to eq( + Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::InternalExecQueryAdapterPatch + ) + end + end + + context 'when ActiveRecord version is lower than 7.1' do + let(:active_record_version) { Gem::Version.new('7.0') } + + context 'for postgresql adapter' do + it 'returns Instrumentation::ExecuteAndClearAdapterPatch' do + expect(described_class.prepended_class_name(:postgresql)).to eq( + Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecuteAndClearAdapterPatch + ) + end + end + + %i[mysql2 sqlite3].each do |adapter_name| + context "for #{adapter_name} adapter" do + it 'returns Instrumentation::ExecQueryAdapterPatch' do + expect(described_class.prepended_class_name(adapter_name)).to eq( + Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecQueryAdapterPatch + ) + end + end + end + end + end +end From 1f0a7a1d7ea2da92af8e5ca5043889cf2b98d450 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Tue, 26 Nov 2024 16:53:05 +0100 Subject: [PATCH 06/30] Add RBS types for AppSec ActiveRecord instrumentation --- .../contrib/active_record/instrumentation.rbs | 23 +++++++++++++++++++ .../contrib/active_record/integration.rbs | 23 +++++++++++++++++++ .../appsec/contrib/active_record/patcher.rbs | 19 +++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 sig/datadog/appsec/contrib/active_record/instrumentation.rbs create mode 100644 sig/datadog/appsec/contrib/active_record/integration.rbs create mode 100644 sig/datadog/appsec/contrib/active_record/patcher.rbs diff --git a/sig/datadog/appsec/contrib/active_record/instrumentation.rbs b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs new file mode 100644 index 00000000000..b81b4138642 --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs @@ -0,0 +1,23 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + module Instrumentation + def self?.detect_sqli: (String sql, String adapter_name) -> void + + module InternalExecQueryAdapterPatch + def internal_exec_query: (String sql, *untyped args, **untyped rest) -> untyped + end + + module ExecuteAndClearAdapterPatch + def execute_and_clear: (String sql, *untyped args, **untyped rest) -> untyped + end + + module ExecQueryAdapterPatch + def exec_query: (String sql, *untyped args, **untyped rest) -> untyped + end + end + end + end + end +end diff --git a/sig/datadog/appsec/contrib/active_record/integration.rbs b/sig/datadog/appsec/contrib/active_record/integration.rbs new file mode 100644 index 00000000000..781f186efc1 --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/integration.rbs @@ -0,0 +1,23 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + class Integration + include Datadog::AppSec::Contrib::Integration + + MINIMUM_VERSION: Gem::Version + + def self.version: () -> Gem::Version? + + def self.loaded?: () -> bool + + def self.compatible?: () -> bool + + def self.auto_instrument?: () -> true + + def patcher: () -> class + end + end + end + end +end diff --git a/sig/datadog/appsec/contrib/active_record/patcher.rbs b/sig/datadog/appsec/contrib/active_record/patcher.rbs new file mode 100644 index 00000000000..609b75bf6fb --- /dev/null +++ b/sig/datadog/appsec/contrib/active_record/patcher.rbs @@ -0,0 +1,19 @@ +module Datadog + module AppSec + module Contrib + module ActiveRecord + module Patcher + include Datadog::AppSec::Contrib::Patcher + + def self?.patched?: () -> bool + + def self?.target_version: () -> Gem::Version? + + def self?.patch: () -> void + + def self?.prepended_class_name: (Symbol adapter_name) -> class + end + end + end + end +end From 2edc1a0473a35b6347bdae88910dd1702e992e26 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Tue, 26 Nov 2024 17:48:25 +0100 Subject: [PATCH 07/30] Fix AppSec ActiveRecord integration specs for JRuby --- .../contrib/active_record/instrumentation.rb | 1 + .../appsec/contrib/active_record/patcher.rb | 2 +- .../active_record/multi_adapter_spec.rb | 19 ++++++++-- .../contrib/active_record/patcher_spec.rb | 38 ++++++++++++++++--- 4 files changed, 49 insertions(+), 11 deletions(-) diff --git a/lib/datadog/appsec/contrib/active_record/instrumentation.rb b/lib/datadog/appsec/contrib/active_record/instrumentation.rb index 17ef8ee5d31..e8103410e35 100644 --- a/lib/datadog/appsec/contrib/active_record/instrumentation.rb +++ b/lib/datadog/appsec/contrib/active_record/instrumentation.rb @@ -53,6 +53,7 @@ def execute_and_clear(sql, *args, **rest) end # patch for mysql2 and sqlite3 adapters in ActiveRecord < 7.1 + # this patch is also used when using JDBC adapter module ExecQueryAdapterPatch def exec_query(sql, *args, **rest) Instrumentation.detect_sqli(sql, adapter_name) diff --git a/lib/datadog/appsec/contrib/active_record/patcher.rb b/lib/datadog/appsec/contrib/active_record/patcher.rb index 61acdab9bce..b6e65b97d51 100644 --- a/lib/datadog/appsec/contrib/active_record/patcher.rb +++ b/lib/datadog/appsec/contrib/active_record/patcher.rb @@ -40,7 +40,7 @@ def patch def prepended_class_name(adapter_name) if ::ActiveRecord.gem_version >= Gem::Version.new('7.1') Instrumentation::InternalExecQueryAdapterPatch - elsif adapter_name == :postgresql + elsif adapter_name == :postgresql && !defined?(::ActiveRecord::ConnectionAdapters::JdbcAdapter) Instrumentation::ExecuteAndClearAdapterPatch else Instrumentation::ExecQueryAdapterPatch diff --git a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb index 7684087f8be..7ce35364975 100644 --- a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb +++ b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb @@ -3,9 +3,13 @@ require 'spec/datadog/tracing/contrib/rails/support/deprecation' -require 'mysql2' -require 'sqlite3' -require 'pg' +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'mysql2' + require 'sqlite3' + require 'pg' +end RSpec.describe 'AppSec ActiveRecord integration' do let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } @@ -28,6 +32,7 @@ # prevent internal sql requests from showing up klass.count + klass.first end end @@ -110,7 +115,13 @@ context 'when using .where' do let(:active_record_scope) { User.where(name: 'Bob') } - let(:expected_db_statement) { 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' } + let(:expected_db_statement) do + if PlatformHelpers.jruby? + 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' + else + 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' + end + end include_examples 'calls_waf_with_correct_arguments' end diff --git a/spec/datadog/appsec/contrib/active_record/patcher_spec.rb b/spec/datadog/appsec/contrib/active_record/patcher_spec.rb index 976d3a0eca6..468b9631a15 100644 --- a/spec/datadog/appsec/contrib/active_record/patcher_spec.rb +++ b/spec/datadog/appsec/contrib/active_record/patcher_spec.rb @@ -5,12 +5,17 @@ RSpec.describe Datadog::AppSec::Contrib::ActiveRecord::Patcher do describe '#prepended_class_name' do - before do - stub_const('::ActiveRecord', Struct.new(:gem_version).new(Gem::Version.new(active_record_version))) - end - context 'when ActiveRecord version is 7.1 or higher' do - let(:active_record_version) { Gem::Version.new('7.1') } + before do + stub_const( + '::ActiveRecord', + Module.new do + module_function def gem_version + Gem::Version.new('7.1') + end + end + ) + end it 'returns Instrumentation::InternalExecQueryAdapterPatch' do expect(described_class.prepended_class_name(:postgresql)).to eq( @@ -20,9 +25,30 @@ end context 'when ActiveRecord version is lower than 7.1' do - let(:active_record_version) { Gem::Version.new('7.0') } + before do + stub_const( + '::ActiveRecord', + Module.new do + module_function def gem_version + Gem::Version.new('7.0') + end + end + ) + end context 'for postgresql adapter' do + context 'when ActiveRecord::ConnectionAdapters::JdbcAdapter is defined' do + before do + stub_const('::ActiveRecord::ConnectionAdapters::JdbcAdapter', Class.new) + end + + it 'returns Instrumentation::ExecQueryAdapterPatch' do + expect(described_class.prepended_class_name(:postgresql)).to eq( + Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecQueryAdapterPatch + ) + end + end + it 'returns Instrumentation::ExecuteAndClearAdapterPatch' do expect(described_class.prepended_class_name(:postgresql)).to eq( Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecuteAndClearAdapterPatch From a2e8ab487dd6a2d77df436be53b8d5726f32ef53 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Wed, 27 Nov 2024 13:48:46 +0100 Subject: [PATCH 08/30] Rename .detect_sqli to .detect_sql_injection --- .../appsec/contrib/active_record/instrumentation.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/datadog/appsec/contrib/active_record/instrumentation.rb b/lib/datadog/appsec/contrib/active_record/instrumentation.rb index e8103410e35..85ca0fc595d 100644 --- a/lib/datadog/appsec/contrib/active_record/instrumentation.rb +++ b/lib/datadog/appsec/contrib/active_record/instrumentation.rb @@ -8,7 +8,7 @@ module ActiveRecord module Instrumentation module_function - def detect_sqli(sql, adapter_name) + def detect_sql_injection(sql, adapter_name) scope = AppSec.active_scope return unless scope @@ -37,7 +37,7 @@ def detect_sqli(sql, adapter_name) # patch for all adapters in ActiveRecord >= 7.1 module InternalExecQueryAdapterPatch def internal_exec_query(sql, *args, **rest) - Instrumentation.detect_sqli(sql, adapter_name) + Instrumentation.detect_sql_injection(sql, adapter_name) super end @@ -46,7 +46,7 @@ def internal_exec_query(sql, *args, **rest) # patch for postgres adapter in ActiveRecord < 7.1 module ExecuteAndClearAdapterPatch def execute_and_clear(sql, *args, **rest) - Instrumentation.detect_sqli(sql, adapter_name) + Instrumentation.detect_sql_injection(sql, adapter_name) super end @@ -56,7 +56,7 @@ def execute_and_clear(sql, *args, **rest) # this patch is also used when using JDBC adapter module ExecQueryAdapterPatch def exec_query(sql, *args, **rest) - Instrumentation.detect_sqli(sql, adapter_name) + Instrumentation.detect_sql_injection(sql, adapter_name) super end From 776d405fba6b9afe4f53eb792595f9a8887485f7 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Wed, 27 Nov 2024 16:27:40 +0100 Subject: [PATCH 09/30] Add specs for AppSec ActiveRecord instrumentation --- .../appsec/contrib/active_record/multi_adapter_spec.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb index 7ce35364975..4080ec0e426 100644 --- a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb +++ b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb @@ -69,6 +69,16 @@ active_record_scope.to_a end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(double(status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + active_record_scope.to_a + end end context 'mysql2 adapter' do From ee4c3a204f084f5733950a6699e33c823ffa0fc0 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Fri, 29 Nov 2024 15:23:13 +0100 Subject: [PATCH 10/30] Simplify AppSec ActiveRecord patcher and split specs by adapter --- .../contrib/active_record/instrumentation.rb | 7 +- .../contrib/active_record/integration.rb | 2 +- .../appsec/contrib/active_record/patcher.rb | 32 ++-- .../contrib/active_record/instrumentation.rbs | 2 +- .../appsec/contrib/active_record/patcher.rbs | 2 - .../active_record/multi_adapter_spec.rb | 171 ------------------ .../active_record/mysql2_adapter_spec.rb | 106 +++++++++++ .../contrib/active_record/patcher_spec.rb | 70 ------- .../active_record/postgresql_adapter_spec.rb | 113 ++++++++++++ .../active_record/sqlite3_adapter_spec.rb | 103 +++++++++++ 10 files changed, 346 insertions(+), 262 deletions(-) delete mode 100644 spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb create mode 100644 spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb delete mode 100644 spec/datadog/appsec/contrib/active_record/patcher_spec.rb create mode 100644 spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb create mode 100644 spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb diff --git a/lib/datadog/appsec/contrib/active_record/instrumentation.rb b/lib/datadog/appsec/contrib/active_record/instrumentation.rb index 85ca0fc595d..3b226a9aafb 100644 --- a/lib/datadog/appsec/contrib/active_record/instrumentation.rb +++ b/lib/datadog/appsec/contrib/active_record/instrumentation.rb @@ -12,9 +12,14 @@ def detect_sql_injection(sql, adapter_name) scope = AppSec.active_scope return unless scope + # libddwaf expects db system to be lowercase, + # in case of sqlite adapter, libddwaf expects 'sqlite' as db system + db_system = adapter_name.downcase + db_system = 'sqlite' if db_system == 'sqlite3' + ephemeral_data = { 'server.db.statement' => sql, - 'server.db.system' => adapter_name.downcase.gsub(/\d{1}\z/, '') + 'server.db.system' => db_system } waf_timeout = Datadog.configuration.appsec.waf_timeout diff --git a/lib/datadog/appsec/contrib/active_record/integration.rb b/lib/datadog/appsec/contrib/active_record/integration.rb index 022067d4e19..00002f491d8 100644 --- a/lib/datadog/appsec/contrib/active_record/integration.rb +++ b/lib/datadog/appsec/contrib/active_record/integration.rb @@ -7,7 +7,7 @@ module Datadog module AppSec module Contrib module ActiveRecord - # Description of ActiveRecord integration + # This class provides helper methods that are used when patching ActiveRecord class Integration include Datadog::AppSec::Contrib::Integration diff --git a/lib/datadog/appsec/contrib/active_record/patcher.rb b/lib/datadog/appsec/contrib/active_record/patcher.rb index b6e65b97d51..dd0c6c220ae 100644 --- a/lib/datadog/appsec/contrib/active_record/patcher.rb +++ b/lib/datadog/appsec/contrib/active_record/patcher.rb @@ -23,27 +23,27 @@ def target_version def patch ActiveSupport.on_load :active_record do - if defined? ::ActiveRecord::ConnectionAdapters::SQLite3Adapter - ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(Patcher.prepended_class_name(:sqlite3)) + instrumentation_module = if ::ActiveRecord.gem_version >= Gem::Version.new('7.1') + Instrumentation::InternalExecQueryAdapterPatch + else + Instrumentation::ExecQueryAdapterPatch + end + + if defined?(::ActiveRecord::ConnectionAdapters::SQLite3Adapter) + ::ActiveRecord::ConnectionAdapters::SQLite3Adapter.prepend(instrumentation_module) end - if defined? ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter - ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(Patcher.prepended_class_name(:postgresql)) + if defined?(::ActiveRecord::ConnectionAdapters::Mysql2Adapter) + ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(instrumentation_module) end - if defined? ::ActiveRecord::ConnectionAdapters::Mysql2Adapter - ::ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(Patcher.prepended_class_name(:mysql2)) - end - end - end + if defined?(::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter) + unless defined?(::ActiveRecord::ConnectionAdapters::JdbcAdapter) + instrumentation_module = Instrumentation::ExecuteAndClearAdapterPatch + end - def prepended_class_name(adapter_name) - if ::ActiveRecord.gem_version >= Gem::Version.new('7.1') - Instrumentation::InternalExecQueryAdapterPatch - elsif adapter_name == :postgresql && !defined?(::ActiveRecord::ConnectionAdapters::JdbcAdapter) - Instrumentation::ExecuteAndClearAdapterPatch - else - Instrumentation::ExecQueryAdapterPatch + ::ActiveRecord::ConnectionAdapters::PostgreSQLAdapter.prepend(instrumentation_module) + end end end end diff --git a/sig/datadog/appsec/contrib/active_record/instrumentation.rbs b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs index b81b4138642..daea0a9f684 100644 --- a/sig/datadog/appsec/contrib/active_record/instrumentation.rbs +++ b/sig/datadog/appsec/contrib/active_record/instrumentation.rbs @@ -3,7 +3,7 @@ module Datadog module Contrib module ActiveRecord module Instrumentation - def self?.detect_sqli: (String sql, String adapter_name) -> void + def self?.detect_sql_injection: (String sql, String adapter_name) -> void module InternalExecQueryAdapterPatch def internal_exec_query: (String sql, *untyped args, **untyped rest) -> untyped diff --git a/sig/datadog/appsec/contrib/active_record/patcher.rbs b/sig/datadog/appsec/contrib/active_record/patcher.rbs index 609b75bf6fb..2850b974c00 100644 --- a/sig/datadog/appsec/contrib/active_record/patcher.rbs +++ b/sig/datadog/appsec/contrib/active_record/patcher.rbs @@ -10,8 +10,6 @@ module Datadog def self?.target_version: () -> Gem::Version? def self?.patch: () -> void - - def self?.prepended_class_name: (Symbol adapter_name) -> class end end end diff --git a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb deleted file mode 100644 index 4080ec0e426..00000000000 --- a/spec/datadog/appsec/contrib/active_record/multi_adapter_spec.rb +++ /dev/null @@ -1,171 +0,0 @@ -require 'datadog/appsec/spec_helper' -require 'active_record' - -require 'spec/datadog/tracing/contrib/rails/support/deprecation' - -if PlatformHelpers.jruby? - require 'activerecord-jdbc-adapter' -else - require 'mysql2' - require 'sqlite3' - require 'pg' -end - -RSpec.describe 'AppSec ActiveRecord integration' do - let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } - let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } - let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } - let(:context) { processor.new_context } - - let(:span) { Datadog::Tracing::SpanOperation.new('root') } - let(:trace) { Datadog::Tracing::TraceOperation.new } - - let!(:user_class) do - stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| - klass.establish_connection(db_config) - - klass.connection.create_table 'users', force: :cascade do |t| - t.string :name, null: false - t.string :email, null: false - t.timestamps - end - - # prevent internal sql requests from showing up - klass.count - klass.first - end - end - - before do - Datadog.configure do |c| - c.appsec.enabled = true - c.appsec.instrument :active_record - end - - Datadog::AppSec::Scope.activate_scope(trace, span, processor) - - raise_on_rails_deprecation! - end - - after do - Datadog.configuration.reset! - - Datadog::AppSec::Scope.deactivate_scope - processor.finalize - end - - shared_examples 'calls_waf_with_correct_arguments' do - it 'calls waf with correct arguments' do - expect(Datadog::AppSec.active_scope.processor_context).to( - receive(:run).with( - {}, - { - 'server.db.statement' => expected_db_statement, - 'server.db.system' => expected_db_system - }, - Datadog.configuration.appsec.waf_timeout - ).and_call_original - ) - - active_record_scope.to_a - end - - it 'adds an event to processor context if waf status is :match' do - expect(Datadog::AppSec.active_scope.processor_context).to( - receive(:run).and_return(double(status: :match, actions: {})) - ) - - expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original - - active_record_scope.to_a - end - end - - context 'mysql2 adapter' do - let(:db_config) do - { - adapter: 'mysql2', - database: ENV.fetch('TEST_MYSQL_DB', 'mysql'), - host: ENV.fetch('TEST_MYSQL_HOST', '127.0.0.1'), - password: ENV.fetch('TEST_MYSQL_ROOT_PASSWORD', 'root'), - port: ENV.fetch('TEST_MYSQL_PORT', '3306') - } - end - - let(:expected_db_system) { 'mysql' } - - context 'when using .where' do - let(:active_record_scope) { User.where(name: 'Bob') } - let(:expected_db_statement) { "SELECT `users`.* FROM `users` WHERE `users`.`name` = 'Bob'" } - - include_examples 'calls_waf_with_correct_arguments' - end - - context 'when using .find_by_sql' do - let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } - let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } - - include_examples 'calls_waf_with_correct_arguments' - end - end - - context 'postgres adapter' do - let(:db_config) do - { - adapter: 'postgresql', - database: ENV.fetch('TEST_POSTGRES_DB', 'postgres'), - host: ENV.fetch('TEST_POSTGRES_HOST', '127.0.0.1'), - port: ENV.fetch('TEST_POSTGRES_PORT', 5432), - username: ENV.fetch('TEST_POSTGRES_USER', 'postgres'), - password: ENV.fetch('TEST_POSTGRES_PASSWORD', 'postgres') - } - end - - let(:expected_db_system) { 'postgresql' } - - context 'when using .where' do - let(:active_record_scope) { User.where(name: 'Bob') } - let(:expected_db_statement) do - if PlatformHelpers.jruby? - 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' - else - 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' - end - end - - include_examples 'calls_waf_with_correct_arguments' - end - - context 'when using .find_by_sql' do - let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } - let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } - - include_examples 'calls_waf_with_correct_arguments' - end - end - - context 'sqlite3 adapter' do - let(:db_config) do - { - adapter: 'sqlite3', - database: ':memory:' - } - end - - let(:expected_db_system) { 'sqlite' } - - context 'when using .where' do - let(:active_record_scope) { User.where(name: 'Bob') } - let(:expected_db_statement) { 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' } - - include_examples 'calls_waf_with_correct_arguments' - end - - context 'when using .find_by_sql' do - let(:active_record_scope) { User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'") } - let(:expected_db_statement) { "SELECT * FROM users WHERE name = 'Bob'" } - - include_examples 'calls_waf_with_correct_arguments' - end - end -end diff --git a/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb new file mode 100644 index 00000000000..6c5777adc1f --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/mysql2_adapter_spec.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'mysql2' +end + +RSpec.describe 'AppSec ActiveRecord integration for Mysql2 adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'mysql2', + database: ENV.fetch('TEST_MYSQL_DB', 'mysql'), + host: ENV.fetch('TEST_MYSQL_HOST', '127.0.0.1'), + password: ENV.fetch('TEST_MYSQL_ROOT_PASSWORD', 'root'), + port: ENV.fetch('TEST_MYSQL_PORT', '3306') + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT `users`.* FROM `users` WHERE `users`.`name` = 'Bob'", + 'server.db.system' => 'mysql2' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'mysql2' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end diff --git a/spec/datadog/appsec/contrib/active_record/patcher_spec.rb b/spec/datadog/appsec/contrib/active_record/patcher_spec.rb deleted file mode 100644 index 468b9631a15..00000000000 --- a/spec/datadog/appsec/contrib/active_record/patcher_spec.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -require 'datadog/appsec/spec_helper' -require 'datadog/appsec/contrib/active_record/patcher' - -RSpec.describe Datadog::AppSec::Contrib::ActiveRecord::Patcher do - describe '#prepended_class_name' do - context 'when ActiveRecord version is 7.1 or higher' do - before do - stub_const( - '::ActiveRecord', - Module.new do - module_function def gem_version - Gem::Version.new('7.1') - end - end - ) - end - - it 'returns Instrumentation::InternalExecQueryAdapterPatch' do - expect(described_class.prepended_class_name(:postgresql)).to eq( - Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::InternalExecQueryAdapterPatch - ) - end - end - - context 'when ActiveRecord version is lower than 7.1' do - before do - stub_const( - '::ActiveRecord', - Module.new do - module_function def gem_version - Gem::Version.new('7.0') - end - end - ) - end - - context 'for postgresql adapter' do - context 'when ActiveRecord::ConnectionAdapters::JdbcAdapter is defined' do - before do - stub_const('::ActiveRecord::ConnectionAdapters::JdbcAdapter', Class.new) - end - - it 'returns Instrumentation::ExecQueryAdapterPatch' do - expect(described_class.prepended_class_name(:postgresql)).to eq( - Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecQueryAdapterPatch - ) - end - end - - it 'returns Instrumentation::ExecuteAndClearAdapterPatch' do - expect(described_class.prepended_class_name(:postgresql)).to eq( - Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecuteAndClearAdapterPatch - ) - end - end - - %i[mysql2 sqlite3].each do |adapter_name| - context "for #{adapter_name} adapter" do - it 'returns Instrumentation::ExecQueryAdapterPatch' do - expect(described_class.prepended_class_name(adapter_name)).to eq( - Datadog::AppSec::Contrib::ActiveRecord::Instrumentation::ExecQueryAdapterPatch - ) - end - end - end - end - end -end diff --git a/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb new file mode 100644 index 00000000000..5ebd8e0cacb --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/postgresql_adapter_spec.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'pg' +end + +RSpec.describe 'AppSec ActiveRecord integration for Postgresql adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'postgresql', + database: ENV.fetch('TEST_POSTGRES_DB', 'postgres'), + host: ENV.fetch('TEST_POSTGRES_HOST', '127.0.0.1'), + port: ENV.fetch('TEST_POSTGRES_PORT', 5432), + username: ENV.fetch('TEST_POSTGRES_USER', 'postgres'), + password: ENV.fetch('TEST_POSTGRES_PASSWORD', 'postgres') + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expected_db_statement = if PlatformHelpers.jruby? + 'SELECT "users".* FROM "users" WHERE "users"."name" = ?' + else + 'SELECT "users".* FROM "users" WHERE "users"."name" = $1' + end + + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => expected_db_statement, + 'server.db.system' => 'postgresql' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'postgresql' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end diff --git a/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb b/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb new file mode 100644 index 00000000000..778fe952a30 --- /dev/null +++ b/spec/datadog/appsec/contrib/active_record/sqlite3_adapter_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require 'datadog/appsec/spec_helper' +require 'active_record' + +require 'spec/datadog/tracing/contrib/rails/support/deprecation' + +if PlatformHelpers.jruby? + require 'activerecord-jdbc-adapter' +else + require 'sqlite3' +end + +RSpec.describe 'AppSec ActiveRecord integration for SQLite3 adapter' do + let(:telemetry) { instance_double(Datadog::Core::Telemetry::Component) } + let(:ruleset) { Datadog::AppSec::Processor::RuleLoader.load_rules(ruleset: :recommended, telemetry: telemetry) } + let(:processor) { Datadog::AppSec::Processor.new(ruleset: ruleset, telemetry: telemetry) } + let(:context) { processor.new_context } + + let(:span) { Datadog::Tracing::SpanOperation.new('root') } + let(:trace) { Datadog::Tracing::TraceOperation.new } + + let!(:user_class) do + stub_const('User', Class.new(ActiveRecord::Base)).tap do |klass| + klass.establish_connection(db_config) + + klass.connection.create_table 'users', force: :cascade do |t| + t.string :name, null: false + t.string :email, null: false + t.timestamps + end + + # prevent internal sql requests from showing up + klass.count + klass.first + end + end + + let(:db_config) do + { + adapter: 'sqlite3', + database: ':memory:' + } + end + + before do + Datadog.configure do |c| + c.appsec.enabled = true + c.appsec.instrument :active_record + end + + Datadog::AppSec::Scope.activate_scope(trace, span, processor) + + raise_on_rails_deprecation! + end + + after do + Datadog.configuration.reset! + + Datadog::AppSec::Scope.deactivate_scope + processor.finalize + end + + it 'calls waf with correct arguments when querying using .where' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => 'SELECT "users".* FROM "users" WHERE "users"."name" = ?', + 'server.db.system' => 'sqlite' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.where(name: 'Bob').to_a + end + + it 'calls waf with correct arguments when querying using .find_by_sql' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).with( + {}, + { + 'server.db.statement' => "SELECT * FROM users WHERE name = 'Bob'", + 'server.db.system' => 'sqlite' + }, + Datadog.configuration.appsec.waf_timeout + ).and_call_original + ) + + User.find_by_sql("SELECT * FROM users WHERE name = 'Bob'").to_a + end + + it 'adds an event to processor context if waf status is :match' do + expect(Datadog::AppSec.active_scope.processor_context).to( + receive(:run).and_return(instance_double(Datadog::AppSec::WAF::Result, status: :match, actions: {})) + ) + + expect(Datadog::AppSec.active_scope.processor_context.events).to receive(:<<).and_call_original + + User.where(name: 'Bob').to_a + end +end From 971f329405bcb72670e77b6121933e22c9d6a651 Mon Sep 17 00:00:00 2001 From: Yury Lebedev Date: Mon, 2 Dec 2024 11:31:55 +0100 Subject: [PATCH 11/30] Move require of AppSec active_record instrumentation --- lib/datadog/appsec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/appsec.rb b/lib/datadog/appsec.rb index 7341af4c790..5f96dbdd5fa 100644 --- a/lib/datadog/appsec.rb +++ b/lib/datadog/appsec.rb @@ -53,10 +53,10 @@ def components end # Integrations -require_relative 'appsec/contrib/active_record/integration' require_relative 'appsec/contrib/rack/integration' require_relative 'appsec/contrib/sinatra/integration' require_relative 'appsec/contrib/rails/integration' +require_relative 'appsec/contrib/active_record/integration' require_relative 'appsec/contrib/devise/integration' require_relative 'appsec/contrib/graphql/integration' From 26a70ad47bbd92868f2f3e757fc9d1ed992a3551 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 2 Dec 2024 10:18:03 +0000 Subject: [PATCH 12/30] [PROF-10978] Require Ruby 3.1+ for heap profiling **What does this PR do?** This PR raises the minimum Ruby version required for heap profiling from the previous value of >= 2.7 to >= 3.1 due to a new VM bug discovered (see below for details). It's mostly a revert of #3366, where we had first tried to workaround a Ruby 2.7/3.0 bug, but it turns out we missed a spot, and we could trigger VM crashes because of that. **Motivation:** Ruby versions prior to 3.1 had a special optimization called `rb_gc_force_recycle` which would allow objects to directly be garbage collected (e.g. without needing to wait for the GC). It turns out that `rb_gc_force_recycle` did not play well with the changes in Ruby 2.7 to how object ids worked. We uncovered this earlier on during the development of the heap profiler, and put in a workaround for the bug that we thought was enough... Unfortunately, it turns out that the workaround is not enough. The following reproducer, when run on Ruby 2.7 or 3.0 shows how the Ruby VM can segfault inside `id2ref` due to the issue above: ```ruby puts RUBY_DESCRIPTION require "datadog" require "objspace" require "pry" NUM_OBJECTS = 10_000_000 recycled_ids = Array.new(NUM_OBJECTS) { 123 } many_objects = Array.new(NUM_OBJECTS) { Object.new } (0...NUM_OBJECTS).each do |i| recycled_ids[i] = many_objects[i].object_id end puts "Seeded objects!" gets (0...NUM_OBJECTS).each do |i| Datadog::Profiling::StackRecorder::Testing._native_gc_force_recycle(many_objects[i]) many_objects[i] = nil end puts GC.stat puts "Recycled objects!" gets many_objects = nil 10.times { GC.start } Array.new(10_000) { Object.new } 10.times { GC.start } puts GC.stat puts "GC'd objects! (Ruby should have released pages?)" gets recycled_ids.each { |i| begin (nil == ObjectSpace._id2ref(i)) rescue nil end } puts "Done!" ``` Crash details: ``` Program received signal SIGSEGV, Segmentation fault. is_swept_object (ptr=93825033355200, objspace=) at gc.c:3868 3868 return page->flags.before_sweep ? FALSE : TRUE; (gdb) bt #0 is_swept_object (ptr=93825033355200, objspace=) at gc.c:3868 #1 is_garbage_object (objspace=0x55555555d220, objspace=0x55555555d220, ptr=93825033355200) at gc.c:3887 #2 is_live_object (ptr=93825033355200, objspace=0x55555555d220) at gc.c:3909 #3 is_live_object (ptr=93825033355200, objspace=0x55555555d220) at gc.c:3898 #4 id2ref (objid=8264881) at gc.c:3999 #5 os_id2ref (os=, objid=) at gc.c:4019 ``` This crash happens because of two things: 1. Ruby does not clean the object id entry for a recycled object from its internal hash map 2. If the memory page where the object lived is returned back to the OS, trying to `id2ref` on that id will cause Ruby to try to read invalid memory and crash. **Additional Notes:** I've chosen to disable heap profiling on 2.7 and 3.0 because I can't think of a good workaround for the bug above, especially not one that does not increase the overhead of heap profiling. **How to test the change?** This PR updates the test coverage to expect Ruby 3.1+ as the minimum for the feature. You can also quickly validate it doesn't get enabled on the older Rubies using: ``` $ DD_PROFILING_ENABLED=true DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED=true bundle exec ddprofrb exec ruby -e "puts RUBY_DESCRIPTION" W, [2024-12-02T10:42:28.771611 #112585] WARN -- datadog: [datadog] Current Ruby version (3.0.5) cannot support heap profiling due to VM bugs/limitations. Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled. ``` --- .../extconf.rb | 8 -- .../heap_recorder.c | 96 +------------ .../stack_recorder.c | 34 ----- lib/datadog/profiling/component.rb | 20 +-- spec/datadog/profiling/component_spec.rb | 23 +--- spec/datadog/profiling/stack_recorder_spec.rb | 130 ------------------ 6 files changed, 14 insertions(+), 297 deletions(-) diff --git a/ext/datadog_profiling_native_extension/extconf.rb b/ext/datadog_profiling_native_extension/extconf.rb index 4acadd68215..bac21c65b69 100644 --- a/ext/datadog_profiling_native_extension/extconf.rb +++ b/ext/datadog_profiling_native_extension/extconf.rb @@ -170,11 +170,6 @@ def skip_building_extension!(reason) # On older Rubies, there was no jit_return member on the rb_control_frame_t struct $defs << "-DNO_JIT_RETURN" if RUBY_VERSION < "3.1" -# On older Rubies, rb_gc_force_recycle allowed to free objects in a way that -# would be invisible to free tracepoints, finalizers and without cleaning -# obj_to_id_tbl mappings. -$defs << "-DHAVE_WORKING_RB_GC_FORCE_RECYCLE" if RUBY_VERSION < "3.1" - # On older Rubies, there are no Ractors $defs << "-DNO_RACTORS" if RUBY_VERSION < "3" @@ -184,9 +179,6 @@ def skip_building_extension!(reason) # On older Rubies, objects would not move $defs << "-DNO_T_MOVED" if RUBY_VERSION < "2.7" -# On older Rubies, there was no RUBY_SEEN_OBJ_ID flag -$defs << "-DNO_SEEN_OBJ_ID_FLAG" if RUBY_VERSION < "2.7" - # On older Rubies, rb_global_vm_lock_struct did not include the owner field $defs << "-DNO_GVL_OWNER" if RUBY_VERSION < "2.6" diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index ea1c2ee5e4c..d186a1a0fba 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -7,10 +7,6 @@ #include "libdatadog_helpers.h" #include "time_helpers.h" -#if (defined(HAVE_WORKING_RB_GC_FORCE_RECYCLE) && ! defined(NO_SEEN_OBJ_ID_FLAG)) - #define CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND -#endif - // Minimum age (in GC generations) of heap objects we want to include in heap // recorder iterations. Object with age 0 represent objects that have yet to undergo // a GC and, thus, may just be noise/trash at instant of iteration and are usually not @@ -123,9 +119,6 @@ typedef struct { // Pointer to the (potentially partial) object_record containing metadata about an ongoing recording. // When NULL, this symbolizes an unstarted/invalid recording. object_record *object_record; - // A flag to track whether we had to force set the RUBY_FL_SEEN_OBJ_ID flag on this object - // as part of our workaround around rb_gc_force_recycle issues. - bool did_recycle_workaround; } recording; struct heap_recorder { @@ -342,46 +335,12 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); } - bool did_recycle_workaround = false; - - #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND - // If we are in a ruby version that has a working rb_gc_force_recycle implementation, - // its usage may lead to an object being re-used outside of the typical GC cycle. - // - // This re-use is in theory invisible to us unless we're lucky enough to sample both - // the original object and the replacement that uses the recycled slot. - // - // In practice, we've observed (https://github.com/DataDog/dd-trace-rb/pull/3366) - // that non-noop implementations of rb_gc_force_recycle have an implementation bug - // which results in the object that re-used the recycled slot inheriting the same - // object id without setting the FL_SEEN_OBJ_ID flag. We rely on this knowledge to - // "observe" implicit frees when an object we are tracking is force-recycled. - // - // However, it may happen that we start tracking a new object and that object was - // allocated on a recycled slot. Due to the bug, this object would be missing the - // FL_SEEN_OBJ_ID flag even though it was not recycled itself. If we left it be, - // when we're doing our liveness check, the absence of the flag would trigger our - // implicit free workaround and the object would be inferred as recycled even though - // it might still be alive. - // - // Thus, if we detect that this new allocation is already missing the flag at the start - // of the heap allocation recording, we force-set it. This should be safe since we - // just called rb_obj_id on it above and the expectation is that any flaggable object - // that goes through it ends up with the flag set (as evidenced by the GC_ASSERT - // lines in https://github.com/ruby/ruby/blob/4a8d7246d15b2054eacb20f8ab3d29d39a3e7856/gc.c#L4050C14-L4050C14). - if (RB_FL_ABLE(new_obj) && !RB_FL_TEST(new_obj, RUBY_FL_SEEN_OBJ_ID)) { - RB_FL_SET(new_obj, RUBY_FL_SEEN_OBJ_ID); - did_recycle_workaround = true; - } - #endif - heap_recorder->active_recording = (recording) { .object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) { .weight = weight * heap_recorder->sample_rate, .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL, .alloc_gen = rb_gc_count(), - }), - .did_recycle_workaround = did_recycle_workaround, + }), }; } @@ -685,41 +644,6 @@ static int st_object_record_update(st_data_t key, st_data_t value, st_data_t ext // If we got this far, then we found a valid live object for the tracked id. - #ifdef CAN_APPLY_GC_FORCE_RECYCLE_BUG_WORKAROUND - // If we are in a ruby version that has a working rb_gc_force_recycle implementation, - // its usage may lead to an object being re-used outside of the typical GC cycle. - // - // This re-use is in theory invisible to us and would mean that the ref from which we - // collected the object_record metadata may not be the same as the current ref and - // thus any further reporting would be innacurately attributed to stale metadata. - // - // In practice, there is a way for us to notice that this happened because of a bug - // in the implementation of rb_gc_force_recycle. Our heap profiler relies on object - // ids and id2ref to detect whether objects are still alive. Turns out that when an - // object with an id is re-used via rb_gc_force_recycle, it will "inherit" the ID - // of the old object but it will NOT have the FL_SEEN_OBJ_ID as per the experiment - // in https://github.com/DataDog/dd-trace-rb/pull/3360#discussion_r1442823517 - // - // Thus, if we detect that the ref we just resolved above is missing this flag, we can - // safely say re-use happened and thus treat it as an implicit free of the object - // we were tracking (the original one which got recycled). - if (RB_FL_ABLE(ref) && !RB_FL_TEST(ref, RUBY_FL_SEEN_OBJ_ID)) { - - // NOTE: We don't really need to set this flag for heap recorder to work correctly - // but doing so partially mitigates a bug in runtimes with working rb_gc_force_recycle - // which leads to broken invariants and leaking of entries in obj_to_id and id_to_obj - // tables in objspace. We already do the same thing when we sample a recycled object, - // here we apply it as well to objects that replace recycled objects that were being - // tracked. More details in https://github.com/DataDog/dd-trace-rb/pull/3366 - RB_FL_SET(ref, RUBY_FL_SEEN_OBJ_ID); - - on_committed_object_record_cleanup(recorder, record); - recorder->stats_last_update.objects_dead++; - return ST_DELETE; - } - - #endif - if ( recorder->size_enabled && recorder->update_include_old && // We only update sizes when doing a full update @@ -803,18 +727,12 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t * object_record *new_object_record = recording.object_record; if (existing) { object_record *existing_record = (object_record*) (*value); - if (recording.did_recycle_workaround) { - // In this case, it's possible for an object id to be re-used and we were lucky enough to have - // sampled both the original object and the replacement so cleanup the old one and replace it with - // the new object_record (i.e. treat this as a combined free+allocation). - on_committed_object_record_cleanup(update_data->heap_recorder, existing_record); - } else { - // This is not supposed to happen, raising... - VALUE existing_inspect = object_record_inspect(existing_record); - VALUE new_inspect = object_record_inspect(new_object_record); - rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " - "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect); - } + + // This is not supposed to happen, raising... + VALUE existing_inspect = object_record_inspect(existing_record); + VALUE new_inspect = object_record_inspect(new_object_record); + rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " + "the same id. previous=%"PRIsVALUE" new=%"PRIsVALUE, existing_inspect, new_inspect); } // Always carry on with the update, we want the new record to be there at the end (*value) = (st_data_t) new_object_record; diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index 31165ba8b3b..710b17356e2 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -258,8 +258,6 @@ static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locatio static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); -static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj); -static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj); static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance); static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns); static VALUE _native_is_object_recorded(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE object_id); @@ -297,10 +295,6 @@ void stack_recorder_init(VALUE profiling_module) { _native_end_fake_slow_heap_serialization, 1); rb_define_singleton_method(testing_module, "_native_debug_heap_recorder", _native_debug_heap_recorder, 1); - rb_define_singleton_method(testing_module, "_native_gc_force_recycle", - _native_gc_force_recycle, 1); - rb_define_singleton_method(testing_module, "_native_has_seen_id_flag", - _native_has_seen_id_flag, 1); rb_define_singleton_method(testing_module, "_native_is_object_recorded?", _native_is_object_recorded, 2); rb_define_singleton_method(testing_module, "_native_heap_recorder_reset_last_update", _native_heap_recorder_reset_last_update, 1); rb_define_singleton_method(testing_module, "_native_recorder_after_gc_step", _native_recorder_after_gc_step, 1); @@ -1006,34 +1000,6 @@ static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recor return heap_recorder_testonly_debug(state->heap_recorder); } -#pragma GCC diagnostic push -// rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop. -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#pragma GCC diagnostic ignored "-Wunused-parameter" -// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. -// It SHOULD NOT be used for other purposes. -static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) { - #ifdef HAVE_WORKING_RB_GC_FORCE_RECYCLE - rb_gc_force_recycle(obj); - #endif - return Qnil; -} -#pragma GCC diagnostic pop - -// This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. -// It SHOULD NOT be used for other purposes. -static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) { - #ifndef NO_SEEN_OBJ_ID_FLAG - if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) { - return Qtrue; - } else { - return Qfalse; - } - #else - return Qfalse; - #endif -} - static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 19239a70c78..e0bafc0cd3f 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -207,28 +207,16 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) return false unless heap_profiling_enabled - if RUBY_VERSION.start_with?("2.") && RUBY_VERSION < "2.7" + if RUBY_VERSION < "3.1" Datadog.logger.warn( - "Heap profiling currently relies on features introduced in Ruby 2.7 and will be forcibly disabled. " \ - "Please upgrade to Ruby >= 2.7 in order to use this feature." + "Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM bugs/limitations. " \ + "Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled." ) return false end - if RUBY_VERSION < "3.1" - Datadog.logger.debug( - "Current Ruby version (#{RUBY_VERSION}) supports forced object recycling which has a bug that the " \ - "heap profiler is forced to work around to remain accurate. This workaround requires force-setting " \ - "the SEEN_OBJ_ID flag on objects that should have it but don't. Full details can be found in " \ - "https://github.com/DataDog/dd-trace-rb/pull/3360. This workaround should be safe but can be " \ - "bypassed by disabling the heap profiler or upgrading to Ruby >= 3.1 where forced object recycling " \ - "was completely removed (https://bugs.ruby-lang.org/issues/18290)." - ) - end - unless allocation_profiling_enabled - raise ArgumentError, - "Heap profiling requires allocation profiling to be enabled" + raise ArgumentError, "Heap profiling requires allocation profiling to be enabled" end Datadog.logger.warn( diff --git a/spec/datadog/profiling/component_spec.rb b/spec/datadog/profiling/component_spec.rb index 573c76b8bdf..2320c482ce6 100644 --- a/spec/datadog/profiling/component_spec.rb +++ b/spec/datadog/profiling/component_spec.rb @@ -308,15 +308,15 @@ stub_const("RUBY_VERSION", testing_version) end - context "on a Ruby older than 2.7" do - let(:testing_version) { "2.6" } + context "on a Ruby older than 3.1" do + let(:testing_version) { "3.0" } it "initializes StackRecorder without heap sampling support and warns" do expect(Datadog::Profiling::StackRecorder).to receive(:new) .with(hash_including(heap_samples_enabled: false, heap_size_enabled: false)) .and_call_original - expect(Datadog.logger).to receive(:warn).with(/upgrade to Ruby >= 2.7/) + expect(Datadog.logger).to receive(:warn).with(/upgrade to Ruby >= 3.1/) build_profiler_component end @@ -367,23 +367,6 @@ build_profiler_component end end - - context "on a Ruby older than 3.1" do - let(:testing_version) { "2.7" } - - it "initializes StackRecorder with heap sampling support but shows warning and debug messages" do - expect(Datadog::Profiling::StackRecorder).to receive(:new) - .with(hash_including(heap_samples_enabled: true)) - .and_call_original - - expect(Datadog.logger).to receive(:debug).with(/Enabled allocation profiling/) - expect(Datadog.logger).to receive(:warn).with(/experimental heap profiling/) - expect(Datadog.logger).to receive(:warn).with(/experimental heap size profiling/) - expect(Datadog.logger).to receive(:debug).with(/forced object recycling.+upgrading to Ruby >= 3.1/) - - build_profiler_component - end - end end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 9b2c644a192..9f1ab10e6a0 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -666,136 +666,6 @@ def sample_allocation(obj) end end - context "on Rubies supporting rb_gc_force_recycle" do - before do - skip "rb_gc_force_recycle is a no-op in current Ruby version" if RUBY_VERSION >= "3.1" - @recycled_sample_allocation_line = 0 - end - - def has_seen_id_flag(obj) - described_class::Testing._native_has_seen_id_flag(obj) - end - - # This method attempts to allocate an object on a recycled heap slot. - # - # Heap slot recycling was a troublesome feature that has been removed from Rubies >= 3.1 - # in which an object could be freed through a fast-path that bypassed a lot of runtime - # machinery such as finalizers or object id tracking and thus introduced a fair amount - # of buggy behaviour. Some of this buggy behaviour manifests when a recycled slot gets - # re-used by a new live object: the new live object id will be the same as the id of - # the object that was recycled, violating a core constraint of Ruby objects: object ids - # are unique and non-repeatable. - # - # Recycling an object slot is easy (accomplished by a rb_gc_force_recycle native method call). - # More difficult is allocating an object on a recycled slot. Ruby gives us no control on - # where to allocate an object so we have to play a probability game. This method attempts to - # maximize our chances of quickly getting an object in a recycled slot by: - # 1. Force recycling 1000 objects. - # 2. Repeatedly allocating 1000 objects and keeping references to them, thus preventing GC - # from reclaiming their slots. - # 3. Checking if any of the ids of the 1000 recycled objects now map to a live object. If - # that happens, then we know that live object was allocated on a recycled slot and we - # can return it. - def create_obj_in_recycled_slot(should_sample_original: false) - # Force-recycle 1000 objects. - # NOTE: In theory, a single force recycle would suffice but the more recycled slots - # there are to use the more probable it is for a new allocation to use it. - recycled_obj_ids = [] - 1000.times do - obj = Object.new - sample_allocation(obj) if should_sample_original - @recycled_sample_allocation_line = __LINE__ - 1 - - # Get the id of the object we're about to recycle - recycled_obj_ids << obj.object_id - - # Force recycle the given object - described_class::Testing._native_gc_force_recycle(obj) - end - - # Repeatedly allocate objects until we find one that resolves to the id of one of - # the force recycled objects - objs = [] - 100.times do - # Instead of doing this one at a time which would be slow given id2ref will - # raise on failure, allocate a ton of objects each time, increasing the - # probability of getting a hit on each iteration - # NOTE: We keep the object references around to prevent GCs from constantly - # freeing up slots from the previous iteration. Thus each consecutive - # iteration should get one step closer to re-using one of the recycled - # slots. This should not lead to OOMs since we know there are 1000 - # free recycled slots available (we recycled them above). At the very - # limit we'd expect the Ruby VM to prefer to re-use those slots rather - # than expand heap pages and when that happens we'd stop iterating. - 1000.times { objs << Object.new } - recycled_obj_ids.each do |obj_id| - return ObjectSpace._id2ref(obj_id) - rescue RangeError # rubocop:disable Lint/SuppressedException - end - end - raise "could not allocate an object in a recycled slot" - end - - it "enforces seen id flag on objects on recycled slots that get sampled" do - recycled_obj = create_obj_in_recycled_slot - - expect(has_seen_id_flag(recycled_obj)).to be false - - sample_allocation(recycled_obj) - - expect(has_seen_id_flag(recycled_obj)).to be true - end - - it "enforces seen id flag on untracked objects that replace tracked recycled objects" do - recycled_obj = create_obj_in_recycled_slot(should_sample_original: true) - - expect(has_seen_id_flag(recycled_obj)).to be false - - serialize - - expect(has_seen_id_flag(recycled_obj)).to be true - end - - it "correctly handles lifecycle of objects on recycled slots that get sampled" do - recycled_obj = create_obj_in_recycled_slot - - sample_allocation(recycled_obj) - sample_line = __LINE__ - 1 - - GC.start # Ensure recycled sample has age > 0 so it shows up in serialized profile - - recycled_sample = heap_samples.find { |s| s.has_location?(path: __FILE__, line: sample_line) } - expect(recycled_sample).not_to be nil - end - - it "supports allocation samples with duplicate ids due to force recycling" do - recycled_obj = create_obj_in_recycled_slot(should_sample_original: true) - - expect { sample_allocation(recycled_obj) }.not_to raise_error - end - - it "raises on allocation samples with duplicate ids that are not due to force recycling" do - obj = Object.new - - sample_allocation(obj) - - expect { sample_allocation(obj) }.to raise_error(/supposed to be unique/) - end - - it "can detect implicit frees due to slot recycling" do - live_objects = [] - live_objects << create_obj_in_recycled_slot(should_sample_original: true) - - # If we act on implicit frees, then we assume that even though there's a live object - # in the same slot as the original one we were tracking, we'll be able to detect this - # recycling, clean up that record and not include it in the final heap samples - relevant_sample = heap_samples.find do |s| - s.has_location?(path: __FILE__, line: @recycled_sample_allocation_line) - end - expect(relevant_sample).to be nil - end - end - # NOTE: This is a regression test that exceptions in end_heap_allocation_recording_with_rb_protect are safely # handled by the stack_recorder. context "when the heap sampler raises an exception during _native_sample" do From d760e052a723f450165648fceed1c76cbb8cb574 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Mon, 2 Dec 2024 12:42:13 +0000 Subject: [PATCH 13/30] Minor: Tweak text in error message --- lib/datadog/profiling/component.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index e0bafc0cd3f..86f8fb798df 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -209,7 +209,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) if RUBY_VERSION < "3.1" Datadog.logger.warn( - "Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM bugs/limitations. " \ + "Current Ruby version (#{RUBY_VERSION}) cannot support heap profiling due to VM limitations. " \ "Please upgrade to Ruby >= 3.1 in order to use this feature. Heap profiling has been disabled." ) return false From ea14aee0fdc4beb572706b7b378fe8e6a9029b0c Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 18 Nov 2024 10:02:02 +0100 Subject: [PATCH 14/30] Github actions scaffold --- .github/workflows/test.yml | 104 ++++++++++++++++++++++++++++++ .vscode/settings.json | 1 + tasks/github.rake | 126 +++++++++++++++++++++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 tasks/github.rake diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000000..59266d518b3 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,104 @@ +# Please do NOT manually edit this file. +# This file is generated by 'bundle exec rake github:actions:test_template' +--- +name: Test +'on': +- push +jobs: + compute_tasks: + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + engine: + - name: ruby + version: '3.3' + alias: ruby-33 + - name: ruby + version: '3.2' + alias: ruby-32 + - name: ruby + version: '3.1' + alias: ruby-31 + - name: ruby + version: '3.0' + alias: ruby-30 + container: + image: ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }} + outputs: + ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}" + ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}" + ruby-31-matrix: "${{ steps.set-matrix.outputs.ruby-31 }}" + ruby-30-matrix: "${{ steps.set-matrix.outputs.ruby-30 }}" + steps: + - uses: actions/checkout@v4 + - run: apt update && apt install jq -y + - run: bundle install + - id: set-matrix + run: | + matrix_json=$(bundle exec rake github:generate_matrix) + # Debug output + echo "Generated JSON:" + echo "$matrix_json" + # Set the output + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT + test-ruby-33: + name: Test on ruby 3.3 + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-33-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.3 + steps: + - uses: actions/checkout@v4 + - run: bundle install + - run: bundle exec rake test:${{ matrix.task }} + test-ruby-32: + name: Test on ruby 3.2 + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-32-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.2 + steps: + - uses: actions/checkout@v4 + - run: bundle install + - run: bundle exec rake test:${{ matrix.task }} + test-ruby-31: + name: Test on ruby 3.1 + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-31-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.1 + steps: + - uses: actions/checkout@v4 + - run: bundle install + - run: bundle exec rake test:${{ matrix.task }} + test-ruby-30: + name: Test on ruby 3.0 + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-30-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.0 + steps: + - uses: actions/checkout@v4 + - run: bundle install + - run: bundle exec rake test:${{ matrix.task }} diff --git a/.vscode/settings.json b/.vscode/settings.json index 605e69ae1d0..81ce33a596e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,7 @@ { "files.associations": { "*.gemfile": "ruby", + "Matrixfile": "ruby", "Dockerfile*": "dockerfile" } } diff --git a/tasks/github.rake b/tasks/github.rake new file mode 100644 index 00000000000..8f67a38c39f --- /dev/null +++ b/tasks/github.rake @@ -0,0 +1,126 @@ +require 'json' +require "psych" +# rubocop:disable Metrics/BlockLength +namespace :github do + namespace :actions do + task :test_template do |t| + runtimes = [ + "ruby:3.3", + "ruby:3.2", + "ruby:3.1", + "ruby:3.0", + ].map do |runtime| + engine, version = runtime.split(':') + runtime_alias = "#{engine}-#{version.gsub('.', '')}" + + OpenStruct.new( + "engine" => engine, + "version" => version, + "alias" => runtime_alias, + "image" => "ghcr.io/datadog/images-rb/engines/#{engine}:#{version}" + ) + end + + test_jobs = runtimes.map do |runtime| + { + "test-#{runtime.alias}" => { + "name" => "Test on #{runtime.engine} #{runtime.version}", + "needs" => ["compute_tasks"], + "runs-on" => "ubuntu-22.04", + "strategy" => { + "fail-fast" => false, + "matrix" => { + "include" => "${{ fromJson(needs.compute_tasks.outputs.#{runtime.alias}-matrix) }}" + } + }, + "container" => { "image" => runtime.image }, + "steps" => [ + { "uses" => "actions/checkout@v4" }, + { "run" => "bundle install" }, + { "run" => "bundle exec rake test:${{ matrix.task }}" } + ] + } + } + end + + compute_tasks = { + "runs-on" => "ubuntu-22.04", + "strategy" => { + "fail-fast" => false, + "matrix" => { + "engine" => runtimes.map do |runtime| + { "name" => runtime.engine, "version" => runtime.version, "alias" => runtime.alias } + end + } + }, + "container" =>{ + "image" => "ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }}" + }, + "outputs" => runtimes.each_with_object({}) do |runtime, hash| + hash["#{runtime.alias}-matrix"] = "${{ steps.set-matrix.outputs.#{runtime.alias} }}" + end, + "steps" => [ + { "uses" => "actions/checkout@v4" }, + { "run" => "apt update && apt install jq -y" }, + { "run" => "bundle install" }, + { + "id" => "set-matrix", + "run" => <<~BASH + matrix_json=$(bundle exec rake github:generate_matrix) + # Debug output + echo "Generated JSON:" + echo "$matrix_json" + # Set the output + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT + BASH + }, + ] + } + + base = { + "name" => 'Test', + "on" => ['push'], + "jobs" => { + "compute_tasks" => compute_tasks, + **test_jobs.reduce(&:merge) + } + } + + string = +"" + string << <<~EOS + # Please do NOT manually edit this file. + # This file is generated by 'bundle exec rake #{t.name}' + EOS + string << Psych.dump(base, line_width: 120) + + File.binwrite(".github/workflows/test.yml", string) + end + end + + task :generate_matrix do + matrix = eval(File.read('Matrixfile')).freeze # rubocop:disable Security/Eval + + matrix = matrix.slice("stripe") + + ruby_version = RUBY_VERSION[0..2] + major, minor, = Gem::Version.new(RUBY_ENGINE_VERSION).segments + ruby_runtime = "#{RUBY_ENGINE}-#{major}.#{minor}" + array = [] + matrix.each do |key, spec_metadata| + matched = spec_metadata.any? do |appraisal_group, rubies| + if RUBY_PLATFORM == 'java' + rubies.include?("✅ #{ruby_version}") && rubies.include?('✅ jruby') + else + rubies.include?("✅ #{ruby_version}") + end + end + + if matched + array << { task: key } + end + end + + puts JSON.pretty_generate(array) + end +end +# rubocop:enable Metrics/BlockLength From e6aac8fd0d00eea5a38e3fa1b3e4b5cb187a68d8 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 18 Nov 2024 20:02:07 +0100 Subject: [PATCH 15/30] Split spec tasks --- .github/workflows/test.yml | 20 ++++++++++++++++---- tasks/github.rake | 23 +++++++++++++++++------ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 59266d518b3..889b6c218c7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,7 +56,10 @@ jobs: steps: - uses: actions/checkout@v4 - run: bundle install - - run: bundle exec rake test:${{ matrix.task }} + - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-32: name: Test on ruby 3.2 needs: @@ -71,7 +74,10 @@ jobs: steps: - uses: actions/checkout@v4 - run: bundle install - - run: bundle exec rake test:${{ matrix.task }} + - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-31: name: Test on ruby 3.1 needs: @@ -86,7 +92,10 @@ jobs: steps: - uses: actions/checkout@v4 - run: bundle install - - run: bundle exec rake test:${{ matrix.task }} + - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-30: name: Test on ruby 3.0 needs: @@ -101,4 +110,7 @@ jobs: steps: - uses: actions/checkout@v4 - run: bundle install - - run: bundle exec rake test:${{ matrix.task }} + - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} diff --git a/tasks/github.rake b/tasks/github.rake index 8f67a38c39f..40546d93d75 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -1,5 +1,8 @@ require 'json' require "psych" +require 'ostruct' +require_relative 'appraisal_conversion' + # rubocop:disable Metrics/BlockLength namespace :github do namespace :actions do @@ -37,7 +40,11 @@ namespace :github do "steps" => [ { "uses" => "actions/checkout@v4" }, { "run" => "bundle install" }, - { "run" => "bundle exec rake test:${{ matrix.task }}" } + { + "name" => "${{ matrix.task }} spec with ${{ matrix.gemfile }}", + "env" => { "BUNDLE_GEMFILE" => "${{ matrix.gemfile }}" }, + "run" => "bundle install && bundle exec rake spec:${{ matrix.task }}" + } ] } } @@ -107,17 +114,21 @@ namespace :github do ruby_runtime = "#{RUBY_ENGINE}-#{major}.#{minor}" array = [] matrix.each do |key, spec_metadata| - matched = spec_metadata.any? do |appraisal_group, rubies| - if RUBY_PLATFORM == 'java' + spec_metadata.each do |group, rubies| + matched = if RUBY_PLATFORM == 'java' rubies.include?("✅ #{ruby_version}") && rubies.include?('✅ jruby') else rubies.include?("✅ #{ruby_version}") end - end - if matched - array << { task: key } + if matched + array << { + gemfile: AppraisalConversion.to_bundle_gemfile(group), + task: key + } + end end + end puts JSON.pretty_generate(array) From c3a941a448b2213ba86bc46c2543608c0b6b9b0b Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 18 Nov 2024 20:22:17 +0100 Subject: [PATCH 16/30] Upload/Download artifact with cache --- .github/workflows/test.yml | 46 +++++++++++++++++++++++++++----------- tasks/github.rake | 35 +++++++++++++++++++++++------ 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 889b6c218c7..e6e3a014525 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,7 @@ jobs: ruby-30-matrix: "${{ steps.set-matrix.outputs.ruby-30 }}" steps: - uses: actions/checkout@v4 - - run: apt update && apt install jq -y + - run: apt-get update && apt-get install jq -y - run: bundle install - id: set-matrix run: | @@ -42,8 +42,16 @@ jobs: echo "$matrix_json" # Set the output echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT + - run: bundle cache + - uses: actions/upload-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-${{ matrix.engine.alias }} + retention-days: 1 + path: | + Gemfile.lock + vendor/ test-ruby-33: - name: Test on ruby 3.3 + name: "${{ matrix.task }} (${{ matrix.group }})" needs: - compute_tasks runs-on: ubuntu-22.04 @@ -55,13 +63,16 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.3 steps: - uses: actions/checkout@v4 - - run: bundle install - - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-33 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-32: - name: Test on ruby 3.2 + name: "${{ matrix.task }} (${{ matrix.group }})" needs: - compute_tasks runs-on: ubuntu-22.04 @@ -73,13 +84,16 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.2 steps: - uses: actions/checkout@v4 - - run: bundle install - - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-32 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-31: - name: Test on ruby 3.1 + name: "${{ matrix.task }} (${{ matrix.group }})" needs: - compute_tasks runs-on: ubuntu-22.04 @@ -91,13 +105,16 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.1 steps: - uses: actions/checkout@v4 - - run: bundle install - - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-31 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} test-ruby-30: - name: Test on ruby 3.0 + name: "${{ matrix.task }} (${{ matrix.group }})" needs: - compute_tasks runs-on: ubuntu-22.04 @@ -109,8 +126,11 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.0 steps: - uses: actions/checkout@v4 - - run: bundle install - - name: "${{ matrix.task }} spec with ${{ matrix.gemfile }}" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-30 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} diff --git a/tasks/github.rake b/tasks/github.rake index 40546d93d75..e1890d0a876 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -7,6 +7,7 @@ require_relative 'appraisal_conversion' namespace :github do namespace :actions do task :test_template do |t| + ubuntu = "ubuntu-22.04" runtimes = [ "ruby:3.3", "ruby:3.2", @@ -27,9 +28,9 @@ namespace :github do test_jobs = runtimes.map do |runtime| { "test-#{runtime.alias}" => { - "name" => "Test on #{runtime.engine} #{runtime.version}", + "name" => "${{ matrix.task }} (${{ matrix.group }})", "needs" => ["compute_tasks"], - "runs-on" => "ubuntu-22.04", + "runs-on" => ubuntu, "strategy" => { "fail-fast" => false, "matrix" => { @@ -39,9 +40,15 @@ namespace :github do "container" => { "image" => runtime.image }, "steps" => [ { "uses" => "actions/checkout@v4" }, - { "run" => "bundle install" }, { - "name" => "${{ matrix.task }} spec with ${{ matrix.gemfile }}", + "uses" => "actions/download-artifact@v4", + "with" => { + "name" => "bundled-dependencies-${{ github.run_id }}-#{runtime.alias}", + } + }, + { "run" => "bundle install --local" }, + { + "name" => "Test ${{ matrix.task }} with ${{ matrix.gemfile }}", "env" => { "BUNDLE_GEMFILE" => "${{ matrix.gemfile }}" }, "run" => "bundle install && bundle exec rake spec:${{ matrix.task }}" } @@ -51,7 +58,7 @@ namespace :github do end compute_tasks = { - "runs-on" => "ubuntu-22.04", + "runs-on" => ubuntu, "strategy" => { "fail-fast" => false, "matrix" => { @@ -68,7 +75,7 @@ namespace :github do end, "steps" => [ { "uses" => "actions/checkout@v4" }, - { "run" => "apt update && apt install jq -y" }, + { "run" => "apt-get update && apt-get install jq -y" }, { "run" => "bundle install" }, { "id" => "set-matrix", @@ -81,6 +88,18 @@ namespace :github do echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT BASH }, + { "run" => "bundle cache" }, + { + "uses" => "actions/upload-artifact@v4", + "with" => { + "name" => "bundled-dependencies-${{ github.run_id }}-${{ matrix.engine.alias }}", + "retention-days" => 1, + "path" => <<~STRING + Gemfile.lock + vendor/ + STRING + } + }, ] } @@ -122,8 +141,10 @@ namespace :github do end if matched + gemfile = AppraisalConversion.to_bundle_gemfile(group) array << { - gemfile: AppraisalConversion.to_bundle_gemfile(group), + group: group, + gemfile: gemfile, task: key } end From 23486ee93ffee705a174c44dc6eb548c3480dfa5 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Mon, 18 Nov 2024 22:28:08 +0100 Subject: [PATCH 17/30] Add main --- .github/workflows/test.yml | 75 -------------------------------------- tasks/github.rake | 15 +++++--- 2 files changed, 10 insertions(+), 80 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e6e3a014525..321b79c8b66 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,22 +14,10 @@ jobs: - name: ruby version: '3.3' alias: ruby-33 - - name: ruby - version: '3.2' - alias: ruby-32 - - name: ruby - version: '3.1' - alias: ruby-31 - - name: ruby - version: '3.0' - alias: ruby-30 container: image: ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }} outputs: ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}" - ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}" - ruby-31-matrix: "${{ steps.set-matrix.outputs.ruby-31 }}" - ruby-30-matrix: "${{ steps.set-matrix.outputs.ruby-30 }}" steps: - uses: actions/checkout@v4 - run: apt-get update && apt-get install jq -y @@ -71,66 +59,3 @@ jobs: env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} - test-ruby-32: - name: "${{ matrix.task }} (${{ matrix.group }})" - needs: - - compute_tasks - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - include: "${{ fromJson(needs.compute_tasks.outputs.ruby-32-matrix) }}" - container: - image: ghcr.io/datadog/images-rb/engines/ruby:3.2 - steps: - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - name: bundled-dependencies-${{ github.run_id }}-ruby-32 - - run: bundle install --local - - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} - env: - BUNDLE_GEMFILE: "${{ matrix.gemfile }}" - run: bundle install && bundle exec rake spec:${{ matrix.task }} - test-ruby-31: - name: "${{ matrix.task }} (${{ matrix.group }})" - needs: - - compute_tasks - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - include: "${{ fromJson(needs.compute_tasks.outputs.ruby-31-matrix) }}" - container: - image: ghcr.io/datadog/images-rb/engines/ruby:3.1 - steps: - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - name: bundled-dependencies-${{ github.run_id }}-ruby-31 - - run: bundle install --local - - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} - env: - BUNDLE_GEMFILE: "${{ matrix.gemfile }}" - run: bundle install && bundle exec rake spec:${{ matrix.task }} - test-ruby-30: - name: "${{ matrix.task }} (${{ matrix.group }})" - needs: - - compute_tasks - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - include: "${{ fromJson(needs.compute_tasks.outputs.ruby-30-matrix) }}" - container: - image: ghcr.io/datadog/images-rb/engines/ruby:3.0 - steps: - - uses: actions/checkout@v4 - - uses: actions/download-artifact@v4 - with: - name: bundled-dependencies-${{ github.run_id }}-ruby-30 - - run: bundle install --local - - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} - env: - BUNDLE_GEMFILE: "${{ matrix.gemfile }}" - run: bundle install && bundle exec rake spec:${{ matrix.task }} diff --git a/tasks/github.rake b/tasks/github.rake index e1890d0a876..fb2904f6ade 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -10,9 +10,9 @@ namespace :github do ubuntu = "ubuntu-22.04" runtimes = [ "ruby:3.3", - "ruby:3.2", - "ruby:3.1", - "ruby:3.0", + # "ruby:3.2", + # "ruby:3.1", + # "ruby:3.0", ].map do |runtime| engine, version = runtime.split(':') runtime_alias = "#{engine}-#{version.gsub('.', '')}" @@ -126,7 +126,11 @@ namespace :github do task :generate_matrix do matrix = eval(File.read('Matrixfile')).freeze # rubocop:disable Security/Eval - matrix = matrix.slice("stripe") + candidates = [ + 'main', + 'stripe' + ] + matrix = matrix.slice(*candidates) ruby_version = RUBY_VERSION[0..2] major, minor, = Gem::Version.new(RUBY_ENGINE_VERSION).segments @@ -141,7 +145,8 @@ namespace :github do end if matched - gemfile = AppraisalConversion.to_bundle_gemfile(group) + gemfile = AppraisalConversion.to_bundle_gemfile(group) rescue "Gemfile" + array << { group: group, gemfile: gemfile, From e97b272b1ded46f2ecb9aaa2ea90e67a8baa32b3 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 00:55:04 +0100 Subject: [PATCH 18/30] Download jq with curl --- .github/workflows/test.yml | 4 +++- tasks/github.rake | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 321b79c8b66..4c4f20d3ba4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,7 +20,9 @@ jobs: ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}" steps: - uses: actions/checkout@v4 - - run: apt-get update && apt-get install jq -y + - run: | + curl -L --retry 3 -f --retry-all-errors --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 + chmod +x /usr/local/bin/jq - run: bundle install - id: set-matrix run: | diff --git a/tasks/github.rake b/tasks/github.rake index fb2904f6ade..95aef290eb8 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -75,7 +75,12 @@ namespace :github do end, "steps" => [ { "uses" => "actions/checkout@v4" }, - { "run" => "apt-get update && apt-get install jq -y" }, + { + "run" => <<~BASH + curl -L --retry 3 -f --retry-all-errors --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 + chmod +x /usr/local/bin/jq + BASH + }, { "run" => "bundle install" }, { "id" => "set-matrix", From 13ba48a31f4e9f2a0b3131663c42ea94ab3b39b7 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 00:55:36 +0100 Subject: [PATCH 19/30] Configure Git --- .github/workflows/test.yml | 2 ++ tasks/github.rake | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4c4f20d3ba4..bc5d6aafdbd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,6 +53,8 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.3 steps: - uses: actions/checkout@v4 + - name: Configure Git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - uses: actions/download-artifact@v4 with: name: bundled-dependencies-${{ github.run_id }}-ruby-33 diff --git a/tasks/github.rake b/tasks/github.rake index 95aef290eb8..c73773798ee 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -40,6 +40,10 @@ namespace :github do "container" => { "image" => runtime.image }, "steps" => [ { "uses" => "actions/checkout@v4" }, + { + "name" => "Configure Git", + "run" => 'git config --global --add safe.directory "$GITHUB_WORKSPACE"' + }, { "uses" => "actions/download-artifact@v4", "with" => { From b49e0e2bc49ed5daf63dc10d9b513a089749ee49 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 01:11:18 +0100 Subject: [PATCH 20/30] Improve job name --- .github/workflows/test.yml | 2 +- tasks/github.rake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bc5d6aafdbd..c6564dbcf72 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -41,7 +41,7 @@ jobs: Gemfile.lock vendor/ test-ruby-33: - name: "${{ matrix.task }} (${{ matrix.group }})" + name: 'ruby-3.3: ${{ matrix.task }} (${{ matrix.group }})' needs: - compute_tasks runs-on: ubuntu-22.04 diff --git a/tasks/github.rake b/tasks/github.rake index c73773798ee..21b9ae0cc39 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -28,7 +28,7 @@ namespace :github do test_jobs = runtimes.map do |runtime| { "test-#{runtime.alias}" => { - "name" => "${{ matrix.task }} (${{ matrix.group }})", + "name" => "#{runtime.engine}-#{runtime.version}: ${{ matrix.task }} (${{ matrix.group }})", "needs" => ["compute_tasks"], "runs-on" => ubuntu, "strategy" => { From 7a1e6f68ee0e5ebf98c3958779f1deab4674e3c9 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 01:21:29 +0100 Subject: [PATCH 21/30] Add pg --- .github/workflows/test.yml | 12 ++++++++++++ tasks/github.rake | 22 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c6564dbcf72..57fd0270335 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,6 +51,18 @@ jobs: include: "${{ fromJson(needs.compute_tasks.outputs.ruby-33-matrix) }}" container: image: ghcr.io/datadog/images-rb/engines/ruby:3.3 + env: + TEST_POSTGRES_HOST: postgres + services: + postgres: + image: postgres:9.6 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + env: + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_DB: postgres steps: - uses: actions/checkout@v4 - name: Configure Git diff --git a/tasks/github.rake b/tasks/github.rake index 21b9ae0cc39..1a6e7f961a9 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -37,7 +37,26 @@ namespace :github do "include" => "${{ fromJson(needs.compute_tasks.outputs.#{runtime.alias}-matrix) }}" } }, - "container" => { "image" => runtime.image }, + "container" => { + "image" => runtime.image, + "env" => { + "TEST_POSTGRES_HOST" => "postgres", + } + }, + "services" => { + "postgres" => { + "image" => "postgres:9.6", + "credentials" => { + "username" => '${{ secrets.DOCKERHUB_USERNAME }}', + "password" => '${{ secrets.DOCKERHUB_TOKEN }}' + }, + "env" => { + "POSTGRES_PASSWORD" => "postgres", + "POSTGRES_USER" => "postgres", + "POSTGRES_DB" => "postgres", + } + }, + }, "steps" => [ { "uses" => "actions/checkout@v4" }, { @@ -137,6 +156,7 @@ namespace :github do candidates = [ 'main', + 'pg', 'stripe' ] matrix = matrix.slice(*candidates) From 523742f9b0fe2420e7978cecdbd0512533d68209 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 02:23:18 +0100 Subject: [PATCH 22/30] Add redis --- .github/workflows/test.yml | 6 ++++++ tasks/github.rake | 37 +++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 57fd0270335..8b9daf4374b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,6 +53,7 @@ jobs: image: ghcr.io/datadog/images-rb/engines/ruby:3.3 env: TEST_POSTGRES_HOST: postgres + TEST_REDIS_HOST: redis services: postgres: image: postgres:9.6 @@ -63,6 +64,11 @@ jobs: POSTGRES_PASSWORD: postgres POSTGRES_USER: postgres POSTGRES_DB: postgres + redis: + image: redis:6.2 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" steps: - uses: actions/checkout@v4 - name: Configure Git diff --git a/tasks/github.rake b/tasks/github.rake index 1a6e7f961a9..cfed86944e4 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -8,6 +8,27 @@ namespace :github do namespace :actions do task :test_template do |t| ubuntu = "ubuntu-22.04" + + docker_login_credentials = { + "username" => '${{ secrets.DOCKERHUB_USERNAME }}', + "password" => '${{ secrets.DOCKERHUB_TOKEN }}' + } + + postgres = { + "image" => "postgres:9.6", + "credentials" => docker_login_credentials.dup, + "env" => { + "POSTGRES_PASSWORD" => "postgres", + "POSTGRES_USER" => "postgres", + "POSTGRES_DB" => "postgres", + } + } + + redis = { + "image" => "redis:6.2", + "credentials" => docker_login_credentials.dup, + } + runtimes = [ "ruby:3.3", # "ruby:3.2", @@ -41,21 +62,12 @@ namespace :github do "image" => runtime.image, "env" => { "TEST_POSTGRES_HOST" => "postgres", + "TEST_REDIS_HOST" => "redis", } }, "services" => { - "postgres" => { - "image" => "postgres:9.6", - "credentials" => { - "username" => '${{ secrets.DOCKERHUB_USERNAME }}', - "password" => '${{ secrets.DOCKERHUB_TOKEN }}' - }, - "env" => { - "POSTGRES_PASSWORD" => "postgres", - "POSTGRES_USER" => "postgres", - "POSTGRES_DB" => "postgres", - } - }, + "postgres" => postgres, + "redis" => redis }, "steps" => [ { "uses" => "actions/checkout@v4" }, @@ -157,6 +169,7 @@ namespace :github do candidates = [ 'main', 'pg', + 'redis', 'stripe' ] matrix = matrix.slice(*candidates) From 58e3444046ab40b9efcd99bfac23611c17c17e7d Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 10:12:10 +0100 Subject: [PATCH 23/30] Add rack --- tasks/github.rake | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tasks/github.rake b/tasks/github.rake index cfed86944e4..21adca8f18f 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -169,9 +169,17 @@ namespace :github do candidates = [ 'main', 'pg', + 'rack', 'redis', 'stripe' ] + + remainders = matrix.keys - candidates + + if remainders.empty? + raise "No remainder found. Use the matrix directly (without candidate filtering)." + end + matrix = matrix.slice(*candidates) ruby_version = RUBY_VERSION[0..2] From 8eda368d7c8014ebde2f80f0d84c5671edf20647 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 10:37:18 +0100 Subject: [PATCH 24/30] Concurrency --- .github/workflows/test.yml | 3 +++ tasks/github.rake | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8b9daf4374b..20c653f3aea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,9 @@ name: Test 'on': - push +concurrency: + group: "${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: "${{ github.ref != 'refs/heads/master' }}" jobs: compute_tasks: runs-on: ubuntu-22.04 diff --git a/tasks/github.rake b/tasks/github.rake index 21adca8f18f..167cd176383 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -146,6 +146,10 @@ namespace :github do base = { "name" => 'Test', "on" => ['push'], + "concurrency" => { + "group" => '${{ github.workflow }}-${{ github.ref }}', + "cancel-in-progress" => '${{ github.ref != \'refs/heads/master\' }}' + }, "jobs" => { "compute_tasks" => compute_tasks, **test_jobs.reduce(&:merge) From 542a89ae1190bcb0d815e33d299b65ed783b715b Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 10:44:37 +0100 Subject: [PATCH 25/30] Add sinatra --- tasks/github.rake | 1 + 1 file changed, 1 insertion(+) diff --git a/tasks/github.rake b/tasks/github.rake index 167cd176383..8bd912f21d2 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -175,6 +175,7 @@ namespace :github do 'pg', 'rack', 'redis', + 'sinatra', 'stripe' ] From d64b70c2073e872200f20ad4a1b1041c0065dec2 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 15:06:39 +0100 Subject: [PATCH 26/30] Remove anchor in yaml --- .github/workflows/test.yml | 47 +++++++++++++++++++++++++++++++++++++- tasks/github.rake | 25 ++++++++++++++------ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 20c653f3aea..0a49e1a92f0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,14 +17,18 @@ jobs: - name: ruby version: '3.3' alias: ruby-33 + - name: ruby + version: '3.2' + alias: ruby-32 container: image: ghcr.io/datadog/images-rb/engines/${{ matrix.engine.name }}:${{ matrix.engine.version }} outputs: ruby-33-matrix: "${{ steps.set-matrix.outputs.ruby-33 }}" + ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}" steps: - uses: actions/checkout@v4 - run: | - curl -L --retry 3 -f --retry-all-errors --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 + curl -L --retry 3 -f --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 chmod +x /usr/local/bin/jq - run: bundle install - id: set-matrix @@ -84,3 +88,44 @@ jobs: env: BUNDLE_GEMFILE: "${{ matrix.gemfile }}" run: bundle install && bundle exec rake spec:${{ matrix.task }} + test-ruby-32: + name: 'ruby-3.2: ${{ matrix.task }} (${{ matrix.group }})' + needs: + - compute_tasks + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + include: "${{ fromJson(needs.compute_tasks.outputs.ruby-32-matrix) }}" + container: + image: ghcr.io/datadog/images-rb/engines/ruby:3.2 + env: + TEST_POSTGRES_HOST: postgres + TEST_REDIS_HOST: redis + services: + postgres: + image: postgres:9.6 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + env: + POSTGRES_PASSWORD: postgres + POSTGRES_USER: postgres + POSTGRES_DB: postgres + redis: + image: redis:6.2 + credentials: + username: "${{ secrets.DOCKERHUB_USERNAME }}" + password: "${{ secrets.DOCKERHUB_TOKEN }}" + steps: + - uses: actions/checkout@v4 + - name: Configure Git + run: git config --global --add safe.directory "$GITHUB_WORKSPACE" + - uses: actions/download-artifact@v4 + with: + name: bundled-dependencies-${{ github.run_id }}-ruby-32 + - run: bundle install --local + - name: Test ${{ matrix.task }} with ${{ matrix.gemfile }} + env: + BUNDLE_GEMFILE: "${{ matrix.gemfile }}" + run: bundle install && bundle exec rake spec:${{ matrix.task }} diff --git a/tasks/github.rake b/tasks/github.rake index 8bd912f21d2..928aaf9ee45 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -9,6 +9,7 @@ namespace :github do task :test_template do |t| ubuntu = "ubuntu-22.04" + # Still being rate limited docker_login_credentials = { "username" => '${{ secrets.DOCKERHUB_USERNAME }}', "password" => '${{ secrets.DOCKERHUB_TOKEN }}' @@ -16,7 +17,7 @@ namespace :github do postgres = { "image" => "postgres:9.6", - "credentials" => docker_login_credentials.dup, + "credentials" => docker_login_credentials, "env" => { "POSTGRES_PASSWORD" => "postgres", "POSTGRES_USER" => "postgres", @@ -26,14 +27,20 @@ namespace :github do redis = { "image" => "redis:6.2", - "credentials" => docker_login_credentials.dup, + "credentials" => docker_login_credentials, } runtimes = [ "ruby:3.3", - # "ruby:3.2", + "ruby:3.2", # "ruby:3.1", # "ruby:3.0", + # "ruby:2.7", + # "ruby:2.6", + # "ruby:2.5", + # "jruby:9.4", + # "jruby:9.3", + # "jruby:9.2", ].map do |runtime| engine, version = runtime.split(':') runtime_alias = "#{engine}-#{version.gsub('.', '')}" @@ -67,7 +74,7 @@ namespace :github do }, "services" => { "postgres" => postgres, - "redis" => redis + "redis" => redis, }, "steps" => [ { "uses" => "actions/checkout@v4" }, @@ -112,7 +119,7 @@ namespace :github do { "uses" => "actions/checkout@v4" }, { "run" => <<~BASH - curl -L --retry 3 -f --retry-all-errors --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 + curl -L --retry 3 -f --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 chmod +x /usr/local/bin/jq BASH }, @@ -156,13 +163,17 @@ namespace :github do } } + # `Psych.dump` directly creates anchors, but Github Actions does not support anchors for YAML, + # convert to JSON first to avoid anchors + json = JSON.dump(base) + yaml = Psych.safe_load(json) + string = +"" string << <<~EOS # Please do NOT manually edit this file. # This file is generated by 'bundle exec rake #{t.name}' EOS - string << Psych.dump(base, line_width: 120) - + string << Psych.dump(yaml, line_width: 120) File.binwrite(".github/workflows/test.yml", string) end end From a19783970fea365fe5417bc597f76de4de0c8f1c Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 19 Nov 2024 15:55:48 +0100 Subject: [PATCH 27/30] Remove jq --- .github/workflows/test.yml | 5 +---- tasks/github.rake | 11 ++--------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0a49e1a92f0..4048b3aa921 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,9 +27,6 @@ jobs: ruby-32-matrix: "${{ steps.set-matrix.outputs.ruby-32 }}" steps: - uses: actions/checkout@v4 - - run: | - curl -L --retry 3 -f --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 - chmod +x /usr/local/bin/jq - run: bundle install - id: set-matrix run: | @@ -38,7 +35,7 @@ jobs: echo "Generated JSON:" echo "$matrix_json" # Set the output - echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json")" >> $GITHUB_OUTPUT - run: bundle cache - uses: actions/upload-artifact@v4 with: diff --git a/tasks/github.rake b/tasks/github.rake index 928aaf9ee45..0ab0f8f5c5b 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -117,12 +117,6 @@ namespace :github do end, "steps" => [ { "uses" => "actions/checkout@v4" }, - { - "run" => <<~BASH - curl -L --retry 3 -f --retry-delay 1 -o /usr/local/bin/jq https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux64 - chmod +x /usr/local/bin/jq - BASH - }, { "run" => "bundle install" }, { "id" => "set-matrix", @@ -132,7 +126,7 @@ namespace :github do echo "Generated JSON:" echo "$matrix_json" # Set the output - echo "${{ matrix.engine.alias }}=$(echo "$matrix_json" | jq -c .)" >> $GITHUB_OUTPUT + echo "${{ matrix.engine.alias }}=$(echo "$matrix_json")" >> $GITHUB_OUTPUT BASH }, { "run" => "bundle cache" }, @@ -220,10 +214,9 @@ namespace :github do } end end - end - puts JSON.pretty_generate(array) + puts JSON.dump(array) end end # rubocop:enable Metrics/BlockLength From a40a8e996177bab65ddc9153c08ef6bffd08a67c Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 3 Dec 2024 11:06:55 +0100 Subject: [PATCH 28/30] Schedule --- .github/workflows/test.yml | 9 +++++++-- tasks/github.rake | 14 ++++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4048b3aa921..a17b1ddb2a8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,9 +1,14 @@ # Please do NOT manually edit this file. # This file is generated by 'bundle exec rake github:actions:test_template' --- -name: Test +name: Unit Tests 'on': -- push + push: + branches: + - master + - poc/** + schedule: + - cron: 0 7 * * * concurrency: group: "${{ github.workflow }}-${{ github.ref }}" cancel-in-progress: "${{ github.ref != 'refs/heads/master' }}" diff --git a/tasks/github.rake b/tasks/github.rake index 0ab0f8f5c5b..34a867e272b 100644 --- a/tasks/github.rake +++ b/tasks/github.rake @@ -145,8 +145,18 @@ namespace :github do } base = { - "name" => 'Test', - "on" => ['push'], + "name" => 'Unit Tests', + "on" => { + "push" => { + "branches" => [ + "master", + "poc/**", + ] + }, + "schedule" => [ + { "cron" => '0 7 * * *' } + ] + }, "concurrency" => { "group" => '${{ github.workflow }}-${{ github.ref }}', "cancel-in-progress" => '${{ github.ref != \'refs/heads/master\' }}' From 55fcec3dc85e13387637442a9341ac88583a6378 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 3 Dec 2024 13:09:44 +0100 Subject: [PATCH 29/30] Revert "Temporarily skipping for 2.7.1" This reverts commit e0617c738663d120ee4f00599561540b9f932cc1. --- spec/datadog/core/environment/execution_spec.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spec/datadog/core/environment/execution_spec.rb b/spec/datadog/core/environment/execution_spec.rb index 274be863444..ba709c90601 100644 --- a/spec/datadog/core/environment/execution_spec.rb +++ b/spec/datadog/core/environment/execution_spec.rb @@ -67,8 +67,7 @@ end context 'when in a Pry session' do - # Temporarily skipped for release 2.7.1 - xit 'returns true' do + it 'returns true' do Tempfile.create('test') do |f| f.write(repl_script) f.close From 0cf25e01d9ffa11f8230eae5c060d272b73af9a9 Mon Sep 17 00:00:00 2001 From: Tony Hsu Date: Tue, 3 Dec 2024 15:06:05 +0100 Subject: [PATCH 30/30] No requirement for benchmarks --- .gitlab/benchmarks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml index ec9a291a5c0..2ced6f01622 100644 --- a/.gitlab/benchmarks.yml +++ b/.gitlab/benchmarks.yml @@ -162,6 +162,7 @@ ddprof-benchmark: benchmarks: stage: benchmarks when: always + needs: [] tags: ["runner:apm-k8s-tweaked-metal"] image: $BASE_CI_IMAGE interruptible: true