Skip to content

Commit

Permalink
Merge pull request #3177 from DataDog/asm-compress-schema-information
Browse files Browse the repository at this point in the history
[APPSEC-10967] Compress and encode schema information
  • Loading branch information
GustavoCaso authored Oct 4, 2023
2 parents e1a3596 + 5b10018 commit de7400c
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 59 deletions.
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ if RUBY_PLATFORM != 'java'
end

group :check do
if RUBY_VERSION >= '2.7.0' && RUBY_PLATFORM != 'java'
gem 'rbs', '~> 3.1.0', require: false
if RUBY_VERSION >= '3.0.0' && RUBY_PLATFORM != 'java'
gem 'rbs', '~> 3.2.0', require: false
gem 'steep', '~> 1.4.0', require: false
end
end
Expand Down
1 change: 1 addition & 0 deletions Steepfile
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ target :ddtrace do
library 'securerandom'
library 'base64'
library 'digest'
library 'zlib'

repo_path 'vendor/rbs'
library 'cucumber'
Expand Down
156 changes: 102 additions & 54 deletions lib/datadog/appsec/event.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
require 'json'
require 'zlib'
require 'base64'

require_relative 'rate_limiter'

Expand Down Expand Up @@ -34,86 +36,132 @@ module Event
Content-Language
].map!(&:downcase).freeze

MAX_ENCODED_SCHEMA_SIZE = 25000
# For more information about this number
# please check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747221082
MIN_SCHEMA_SIZE_FOR_COMPRESSION = 260

# Record events for a trace
#
# This is expected to be called only once per trace for the rate limiter
# to properly apply
def self.record(span, *events)
# ensure rate limiter is called only when there are events to record
return if events.empty? || span.nil?
class << self
def record(span, *events)
# ensure rate limiter is called only when there are events to record
return if events.empty? || span.nil?

Datadog::AppSec::RateLimiter.limit(:traces) do
record_via_span(span, *events)
end
end

def self.record_via_span(span, *events)
events.group_by { |e| e[:trace] }.each do |trace, event_group|
unless trace
Datadog.logger.debug { "{ error: 'no trace: cannot record', event_group: #{event_group.inspect}}" }
next
Datadog::AppSec::RateLimiter.limit(:traces) do
record_via_span(span, *events)
end
end

trace.keep!
trace.set_tag(
Datadog::Tracing::Metadata::Ext::Distributed::TAG_DECISION_MAKER,
Datadog::Tracing::Sampling::Ext::Decision::ASM
)
def record_via_span(span, *events)
events.group_by { |e| e[:trace] }.each do |trace, event_group|
unless trace
Datadog.logger.debug { "{ error: 'no trace: cannot record', event_group: #{event_group.inspect}}" }
next
end

# prepare and gather tags to apply
service_entry_tags = build_service_entry_tags(event_group)
trace.keep!
trace.set_tag(
Datadog::Tracing::Metadata::Ext::Distributed::TAG_DECISION_MAKER,
Datadog::Tracing::Sampling::Ext::Decision::ASM
)

# complex types are unsupported, we need to serialize to a string
triggers = service_entry_tags.delete('_dd.appsec.triggers')
span.set_tag('_dd.appsec.json', JSON.dump({ triggers: triggers }))
# prepare and gather tags to apply
service_entry_tags = build_service_entry_tags(event_group)

# apply tags to service entry span
service_entry_tags.each do |key, value|
span.set_tag(key, value)
# apply tags to service entry span
service_entry_tags.each do |key, value|
span.set_tag(key, value)
end
end
end
end

def self.build_service_entry_tags(event_group)
event_group.each_with_object({}) do |event, tags|
# TODO: assume HTTP request context for now

if (request = event[:request])
request_headers = request.headers.select do |k, _|
ALLOWED_REQUEST_HEADERS.include?(k.downcase)
# rubocop:disable Metrics/MethodLength
def build_service_entry_tags(event_group)
waf_events = []
entry_tags = event_group.each_with_object({ '_dd.origin' => 'appsec' }) do |event, tags|
# TODO: assume HTTP request context for now
if (request = event[:request])
request.headers.each do |header, value|
tags["http.request.headers.#{header}"] = value if ALLOWED_REQUEST_HEADERS.include?(header.downcase)
end

tags['http.host'] = request.host
tags['http.useragent'] = request.user_agent
tags['network.client.ip'] = request.remote_addr
end

request_headers.each do |header, value|
tags["http.request.headers.#{header}"] = value
if (response = event[:response])
response.headers.each do |header, value|
tags["http.response.headers.#{header}"] = value if ALLOWED_RESPONSE_HEADERS.include?(header.downcase)
end
end

tags['http.host'] = request.host
tags['http.useragent'] = request.user_agent
tags['network.client.ip'] = request.remote_addr
end
waf_result = event[:waf_result]
# accumulate triggers
waf_events += waf_result.events

if (response = event[:response])
response_headers = response.headers.select do |k, _|
ALLOWED_RESPONSE_HEADERS.include?(k.downcase)
end
waf_result.derivatives.each do |key, value|
parsed_value = json_parse(value)
next unless parsed_value

parsed_value_size = parsed_value.size

schema_value = if parsed_value_size >= MIN_SCHEMA_SIZE_FOR_COMPRESSION
compressed_and_base64_encoded(parsed_value)
else
parsed_value
end
next unless schema_value

response_headers.each do |header, value|
tags["http.response.headers.#{header}"] = value
if schema_value.size >= MAX_ENCODED_SCHEMA_SIZE
Datadog.logger.debug do
"Schema key: #{key} exceeds the max size value. It will not be included as part of the span tags"
end
next
end

tags[key] = schema_value
end

tags
end

tags['_dd.origin'] = 'appsec'
appsec_events = json_parse({ triggers: waf_events })
entry_tags['_dd.appsec.json'] = appsec_events if appsec_events
entry_tags
end
# rubocop:enable Metrics/MethodLength

# accumulate triggers
waf_result = event[:waf_result]
tags['_dd.appsec.triggers'] ||= []
tags['_dd.appsec.triggers'] += waf_result.events
private

waf_result.derivatives.each do |key, value|
tags[key] = JSON.dump(value)
def compressed_and_base64_encoded(value)
Base64.encode64(gzip(value))
rescue TypeError => e
Datadog.logger.debug do
"Failed to compress and encode value when populating AppSec::Event. Error: #{e.message}"
end
nil
end

def json_parse(value)
JSON.dump(value)
rescue ArgumentError => e
Datadog.logger.debug do
"Failed to parse value to JSON when populating AppSec::Event. Error: #{e.message}"
end
nil
end

tags
def gzip(value)
sio = StringIO.new
# For an in depth comparison of Zlib options check https://github.com/DataDog/dd-trace-rb/pull/3177#issuecomment-1747215473
gz = Zlib::GzipWriter.new(sio, Zlib::BEST_SPEED, Zlib::DEFAULT_STRATEGY)
gz.write(value)
gz.close
sio.string
end
end
end
Expand Down
11 changes: 11 additions & 0 deletions sig/datadog/appsec/event.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,22 @@ module Datadog

ALLOWED_RESPONSE_HEADERS: untyped

MAX_ENCODED_SCHEMA_SIZE: Numeric
MIN_SCHEMA_SIZE_FOR_COMPRESSION: Numeric

def self.record: (Datadog::Tracing::SpanOperation, *untyped events) -> (nil | untyped)

def self.record_via_span: (Datadog::Tracing::SpanOperation, *untyped events) -> untyped

def self.build_service_entry_tags: (Array[Hash[::Symbol, untyped]] event_group) -> Hash[::String, untyped]

private

def self.compressed_and_base64_encoded: (untyped value) -> untyped

def self.json_parse: (untyped value) -> untyped

def self.gzip: (untyped value) -> untyped
end
end
end
31 changes: 28 additions & 3 deletions spec/datadog/appsec/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,34 @@
}
end

it 'adds derivatives to the top level span meta' do
meta = top_level_span.meta
expect(meta['_dd.appsec.s.req.headers']).to eq JSON.dump([{ 'host' => [8], 'version' => [8] }])
context 'JSON payload' do
it 'uses JSON string when do not exceeds MIN_SCHEMA_SIZE_FOR_COMPRESSION' do
stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 3000)
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to eq('[{"host":[8],"version":[8]}]')
end
end

context 'Compressed payload' do
it 'uses compressed value when JSON string is bigger than MIN_SCHEMA_SIZE_FOR_COMPRESSION' do
result = "H4sIAOYoHGUAA4aphwAAAA=\n"
stub_const('Datadog::AppSec::Event::MIN_SCHEMA_SIZE_FOR_COMPRESSION', 1)
expect(described_class).to receive(:compressed_and_base64_encoded).and_return(result)

meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to eq(result)
end
end

context 'derivative values exceed Event::MAX_ENCODED_SCHEMA_SIZE value' do
it 'do not add derivative key to meta' do
stub_const('Datadog::AppSec::Event::MAX_ENCODED_SCHEMA_SIZE', 1)
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to be_nil
end
end
end
end
Expand Down

0 comments on commit de7400c

Please sign in to comment.