Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[APPSEC-10967] Compress and encode schema information #3177

Merged
merged 6 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ if RUBY_PLATFORM != 'java'
end

group :check do
if RUBY_VERSION >= '2.7.0' && RUBY_PLATFORM != 'java'
gem 'rbs', '~> 3.1.0', require: false
if RUBY_VERSION >= '3.0.0' && RUBY_PLATFORM != 'java'
gem 'rbs', '~> 3.2.0', require: false
gem 'steep', '~> 1.4.0', require: false
end
end
Expand Down
1 change: 1 addition & 0 deletions Steepfile
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ target :ddtrace do
library 'securerandom'
library 'base64'
library 'digest'
library 'zlib'

repo_path 'vendor/rbs'
library 'cucumber'
Expand Down
146 changes: 91 additions & 55 deletions lib/datadog/appsec/event.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
require 'json'
require 'zlib'
require 'base64'

require_relative 'rate_limiter'

Expand Down Expand Up @@ -34,86 +36,120 @@ module Event
Content-Language
].map!(&:downcase).freeze

MAX_ENCODED_SCHEMA_SIZE = 25000

# Record events for a trace
#
# This is expected to be called only once per trace for the rate limiter
# to properly apply
def self.record(span, *events)
# ensure rate limiter is called only when there are events to record
return if events.empty? || span.nil?

Datadog::AppSec::RateLimiter.limit(:traces) do
record_via_span(span, *events)
end
end
class << self
def record(span, *events)
# ensure rate limiter is called only when there are events to record
return if events.empty? || span.nil?

def self.record_via_span(span, *events)
events.group_by { |e| e[:trace] }.each do |trace, event_group|
unless trace
Datadog.logger.debug { "{ error: 'no trace: cannot record', event_group: #{event_group.inspect}}" }
next
Datadog::AppSec::RateLimiter.limit(:traces) do
record_via_span(span, *events)
end
end

trace.keep!
trace.set_tag(
Datadog::Tracing::Metadata::Ext::Distributed::TAG_DECISION_MAKER,
Datadog::Tracing::Sampling::Ext::Decision::ASM
)
def record_via_span(span, *events)
events.group_by { |e| e[:trace] }.each do |trace, event_group|
unless trace
Datadog.logger.debug { "{ error: 'no trace: cannot record', event_group: #{event_group.inspect}}" }
next
end

# prepare and gather tags to apply
service_entry_tags = build_service_entry_tags(event_group)
trace.keep!
trace.set_tag(
Datadog::Tracing::Metadata::Ext::Distributed::TAG_DECISION_MAKER,
Datadog::Tracing::Sampling::Ext::Decision::ASM
)

# complex types are unsupported, we need to serialize to a string
triggers = service_entry_tags.delete('_dd.appsec.triggers')
span.set_tag('_dd.appsec.json', JSON.dump({ triggers: triggers }))
# prepare and gather tags to apply
service_entry_tags = build_service_entry_tags(event_group)

# apply tags to service entry span
service_entry_tags.each do |key, value|
span.set_tag(key, value)
# apply tags to service entry span
service_entry_tags.each do |key, value|
span.set_tag(key, value)
end
end
end
end

def self.build_service_entry_tags(event_group)
event_group.each_with_object({}) do |event, tags|
# TODO: assume HTTP request context for now

if (request = event[:request])
request_headers = request.headers.select do |k, _|
ALLOWED_REQUEST_HEADERS.include?(k.downcase)
# rubocop: disable Metrics/MethodLength
def build_service_entry_tags(event_group)
waf_events = []
entry_tags = event_group.each_with_object({ '_dd.origin' => 'appsec' }) do |event, tags|
# TODO: assume HTTP request context for now
if (request = event[:request])
request.headers.each do |header, value|
tags["http.request.headers.#{header}"] = value if ALLOWED_REQUEST_HEADERS.include?(header.downcase)
end
Comment on lines +87 to +89
Copy link
Member Author

@GustavoCaso GustavoCaso Oct 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reduce the number of times we iterate over request.headers


tags['http.host'] = request.host
tags['http.useragent'] = request.user_agent
tags['network.client.ip'] = request.remote_addr
end

request_headers.each do |header, value|
tags["http.request.headers.#{header}"] = value
if (response = event[:response])
response.headers.each do |header, value|
tags["http.response.headers.#{header}"] = value if ALLOWED_RESPONSE_HEADERS.include?(header.downcase)
end
Comment on lines +97 to +99
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reduce the number of times we iterate over response.headers

end

tags['http.host'] = request.host
tags['http.useragent'] = request.user_agent
tags['network.client.ip'] = request.remote_addr
end
waf_result = event[:waf_result]
# accumulate triggers
waf_events += waf_result.events

if (response = event[:response])
response_headers = response.headers.select do |k, _|
ALLOWED_RESPONSE_HEADERS.include?(k.downcase)
end
waf_result.derivatives.each do |key, value|
parsed_value = json_parse(value)
next unless parsed_value

response_headers.each do |header, value|
tags["http.response.headers.#{header}"] = value
parsed_value_size = parsed_value.size

compressed_data = compressed_and_base64_encoded(parsed_value)
compressed_data_size = compressed_data.size

if compressed_data_size >= MAX_ENCODED_SCHEMA_SIZE && parsed_value_size >= MAX_ENCODED_SCHEMA_SIZE
Datadog.logger.debug do
"Schema key: #{key} exceeds the max size value. It will not be included as part of the span tags"
end
next
end

derivative_value = parsed_value_size > compressed_data_size ? compressed_data : parsed_value

tags[key] = derivative_value
end

tags
end

tags['_dd.origin'] = 'appsec'
appsec_events = json_parse({ triggers: waf_events })
entry_tags['_dd.appsec.json'] = appsec_events if appsec_events
Comment on lines +132 to +133
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move from record_via_span into build_service_entry_tags

entry_tags
end
# rubocop: enable Metrics/MethodLength

# accumulate triggers
waf_result = event[:waf_result]
tags['_dd.appsec.triggers'] ||= []
tags['_dd.appsec.triggers'] += waf_result.events
private

waf_result.derivatives.each do |key, value|
tags[key] = JSON.dump(value)
end
def compressed_and_base64_encoded(value)
Base64.encode64(gzip(value))
rescue TypeError
nil
end

def json_parse(value)
JSON.dump(value)
rescue ArgumentError
nil
end

tags
def gzip(value)
sio = StringIO.new
gz = Zlib::GzipWriter.new(sio, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
Copy link
Member

@marcotc marcotc Oct 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could I trouble to run very simple benchmarks in your machine with a sample value payload and different combinations of compression and strategy arguments? (measure performance and compressed size, so we can make a good trade-off decision.)

gz.write(value)
gz.close
sio.string
end
end
end
Expand Down
10 changes: 10 additions & 0 deletions sig/datadog/appsec/event.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@ module Datadog

ALLOWED_RESPONSE_HEADERS: untyped

MAX_ENCODED_SCHEMA_SIZE: Numeric

def self.record: (Datadog::Tracing::SpanOperation, *untyped events) -> (nil | untyped)

def self.record_via_span: (Datadog::Tracing::SpanOperation, *untyped events) -> untyped

def self.build_service_entry_tags: (Array[Hash[::Symbol, untyped]] event_group) -> Hash[::String, untyped]

private

def self.compressed_and_base64_encoded: (untyped value) -> untyped

def self.json_parse: (untyped value) -> untyped

def self.gzip: (untyped value) -> untyped
end
end
end
29 changes: 26 additions & 3 deletions spec/datadog/appsec/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,32 @@
}
end

it 'adds derivatives to the top level span meta' do
meta = top_level_span.meta
expect(meta['_dd.appsec.s.req.headers']).to eq JSON.dump([{ 'host' => [8], 'version' => [8] }])
context 'JSON payload' do
it 'uses JSON string as is smaller than the compressed value' do
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to eq('[{"host":[8],"version":[8]}]')
end
end

context 'Compressed payload' do
it 'uses compressed value when is smaller than JSON string' do
result = "H4sIAOYoHGUAA4aphwAAAA=\n"
expect(described_class).to receive(:compressed_and_base64_encoded).and_return(result)

meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to eq(result)
end
end

context 'derivative values exceed Event::MAX_ENCODED_SCHEMA_SIZE value' do
it 'do not add derivative key to meta' do
stub_const('Datadog::AppSec::Event::MAX_ENCODED_SCHEMA_SIZE', 1)
meta = top_level_span.meta

expect(meta['_dd.appsec.s.req.headers']).to be_nil
end
end
end
end
Expand Down
Loading