Skip to content

Commit

Permalink
Merge pull request #1028 from danmayer/add_benchmark_and_profile
Browse files Browse the repository at this point in the history
add a benchmark and profile script and hook into CI
  • Loading branch information
danmayer authored Feb 12, 2025
2 parents c626a9b + f51fbcc commit dba1337
Show file tree
Hide file tree
Showing 4 changed files with 518 additions and 0 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Benchmarks

on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Install Memcached 1.6.23
working-directory: scripts
env:
MEMCACHED_VERSION: 1.6.23
run: |
chmod +x ./install_memcached.sh
./install_memcached.sh
memcached -d
memcached -d -p 11222
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.2
bundler-cache: true # 'bundle install' and cache
- name: Run Benchmarks
run: RUBY_YJIT_ENABLE=1 BENCH_TARGET=all bundle exec bin/benchmark
38 changes: 38 additions & 0 deletions .github/workflows/profile.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Profiles

on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Install Memcached 1.6.23
working-directory: scripts
env:
MEMCACHED_VERSION: 1.6.23
run: |
chmod +x ./install_memcached.sh
./install_memcached.sh
memcached -d
- name: Set up Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.4
bundler-cache: true # 'bundle install' and cache
- name: Run Profiles
run: RUBY_YJIT_ENABLE=1 BENCH_TARGET=all bundle exec bin/profile
- name: Upload profile results
uses: actions/upload-artifact@v4
with:
name: profile-results
path: |
client_get_profile.json
socket_get_profile.json
client_set_profile.json
socket_set_profile.json
client_get_multi_profile.json
socket_get_multi_profile.json
client_set_multi_profile.json
socket_set_multi_profile.json
255 changes: 255 additions & 0 deletions bin/benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

# This helps benchmark current performance of Dalli
# as well as compare performance of optimizated and non-optimized calls like multi-set vs set
#
# run with:
# bundle exec bin/benchmark
# RUBY_YJIT_ENABLE=1 BENCH_TARGET=get bundle exec bin/benchmark
require 'bundler/inline'
require 'json'

gemfile do
source 'https://rubygems.org'
gem 'benchmark-ips'
gem 'logger'
end

require_relative '../lib/dalli'
require 'benchmark/ips'
require 'monitor'

##
# StringSerializer is a serializer that avoids the overhead of Marshal or JSON.
##
class StringSerializer
def self.dump(value)
value
end

def self.load(value)
value
end
end

dalli_url = ENV['BENCH_CACHE_URL'] || '127.0.0.1:11211'
bench_target = ENV['BENCH_TARGET'] || 'set'
bench_time = (ENV['BENCH_TIME'] || 10).to_i
bench_warmup = (ENV['BENCH_WARMUP'] || 3).to_i
bench_payload_size = (ENV['BENCH_PAYLOAD_SIZE'] || 700_000).to_i
payload = 'B' * bench_payload_size
TERMINATOR = "\r\n"
puts "yjit: #{RubyVM::YJIT.enabled?}"

client = Dalli::Client.new(dalli_url, serializer: StringSerializer, compress: false, raw: true)
multi_client = Dalli::Client.new('localhost:11211,localhost:11222', serializer: StringSerializer, compress: false,
raw: true)

# The raw socket implementation is used to benchmark the performance of dalli & the overhead of the various abstractions
# in the library.
sock = TCPSocket.new('127.0.0.1', '11211', connect_timeout: 1)
sock.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, true)
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true)
# Benchmarks didn't see any performance gains from increasing the SO_RCVBUF buffer size
# sock.setsockopt(Socket::SOL_SOCKET, ::Socket::SO_RCVBUF, 1024 * 1024 * 8)
# Benchamrks did see an improvement in performance when increasing the SO_SNDBUF buffer size
# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 1024 * 1024 * 8)

# ensure the clients are all connected and working
client.set('key', payload)
multi_client.set('multi_key', payload)
sock.write("set sock_key 0 3600 #{payload.bytesize}\r\n")
sock.write(payload)
sock.write(TERMINATOR)
sock.flush
sock.readline # clear the buffer

raise 'dalli client mismatch' if payload != client.get('key')

raise 'multi dalli client mismatch' if payload != multi_client.get('multi_key')

sock.write("mg sock_key v\r\n")
sock.readline
sock_value = sock.read(payload.bytesize)
sock.read(TERMINATOR.bytesize)
raise 'sock mismatch' if payload != sock_value

# ensure we have basic data for the benchmarks and get calls
payload_smaller = 'B' * (bench_payload_size / 10)
pairs = {}
100.times do |i|
pairs["multi_#{i}"] = payload_smaller
end
client.quiet do
pairs.each do |key, value|
client.set(key, value, 3600, raw: true)
end
end

###
# GC Suite
# benchmark without GC skewing things
###
class GCSuite
def warming(*)
run_gc
end

def running(*)
run_gc
end

def warmup_stats(*); end

def add_report(*); end

private

def run_gc
GC.enable
GC.start
GC.disable
end
end
suite = GCSuite.new

# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/PerceivedComplexity
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/CyclomaticComplexity
def sock_get_multi(sock, pairs)
count = pairs.length
pairs.each_key do |key|
count -= 1
tail = count.zero? ? '' : 'q'
sock.write("mg #{key} v f k #{tail}\r\n")
end
sock.flush
# read all the memcached responses back and build a hash of key value pairs
results = {}
last_result = false
while (line = sock.readline.chomp!(TERMINATOR)) != ''
last_result = true if line.start_with?('EN ')
next unless line.start_with?('VA ') || last_result

_, value_length, _flags, key = line.split
results[key[1..]] = sock.read(value_length.to_i)
sock.read(TERMINATOR.length)
break if results.size == pairs.size
break if last_result
end
results
end
# rubocop:enable Metrics/MethodLength
# rubocop:enable Metrics/PerceivedComplexity
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity

if %w[all set].include?(bench_target)
Benchmark.ips do |x|
x.config(warmup: bench_warmup, time: bench_time, suite: suite)
x.report('client set') { client.set('key', payload) }
# x.report('multi client set') { multi_client.set('string_key', payload) }
x.report('raw sock set') do
sock.write("ms sock_key #{payload.bytesize} T3600 MS\r\n")
sock.write(payload)
sock.write("\r\n")
sock.flush
sock.readline # clear the buffer
end
x.compare!
end
end

@lock = Monitor.new
if %w[all get].include?(bench_target)
Benchmark.ips do |x|
x.config(warmup: bench_warmup, time: bench_time, suite: suite)
x.report('get dalli') do
result = client.get('key')
raise 'mismatch' unless result == payload
end
# NOTE: while this is the fastest it is not thread safe and is blocking vs IO sharing friendly
x.report('get sock') do
sock.write("mg sock_key v\r\n")
sock.readline
result = sock.read(payload.bytesize)
sock.read(TERMINATOR.bytesize)
raise 'mismatch' unless result == payload
end
# NOTE: This shows that when adding thread safety & non-blocking IO we are slower for single process/thread use case
x.report('get sock non-blocking') do
@lock.synchronize do
sock.write("mg sock_key v\r\n")
sock.readline
count = payload.bytesize
value = String.new(capacity: count + 1)
loop do
begin
value << sock.read_nonblock(count - value.bytesize)
rescue Errno::EAGAIN
sock.wait_readable
retry
rescue EOFError
puts 'EOFError'
break
end
break if value.bytesize == count
end
sock.read(TERMINATOR.bytesize)
raise 'mismatch' unless value == payload
end
end
x.compare!
end
end

if %w[all get_multi].include?(bench_target)
Benchmark.ips do |x|
x.config(warmup: bench_warmup, time: bench_time, suite: suite)
x.report('get 100 keys') do
result = client.get_multi(pairs.keys)
raise 'mismatch' unless result == pairs
end
x.report('get 100 keys raw sock') do
result = sock_get_multi(sock, pairs)
raise 'mismatch' unless result == pairs
end
x.compare!
end
end

if %w[all set_multi].include?(bench_target)
Benchmark.ips do |x|
x.config(warmup: bench_warmup, time: bench_time, suite: suite)
x.report('write 100 keys simple') do
client.quiet do
pairs.each do |key, value|
client.set(key, value, 3600, raw: true)
end
end
end
# TODO: uncomment this once we add PR adding set_multi
# x.report('multi client set_multi 100') do
# multi_client.set_multi(pairs, 3600, raw: true)
# end
x.report('write 100 keys rawsock') do
count = pairs.length
tail = ''
value_bytesize = payload_smaller.bytesize
ttl = 3600

pairs.each do |key, value|
count -= 1
tail = count.zero? ? '' : 'q'
sock.write(String.new("ms #{key} #{value_bytesize} c F0 T#{ttl} MS #{tail}\r\n",
capacity: key.size + value_bytesize + 40) << value << TERMINATOR)
end
sock.flush
sock.gets(TERMINATOR) # clear the buffer
end
# x.report('write_mutli 100 keys') { client.set_multi(pairs, 3600, raw: true) }
x.compare!
end
end
Loading

0 comments on commit dba1337

Please sign in to comment.