Skip to content

Commit 82a1ea5

Browse files
eightbitraptorluke-gruber
authored andcommitted
Port over k-nucleotide from TCLBG
contains 2 implementatsion. One using Process.fork, the other using Ractors
1 parent 68e1f7d commit 82a1ea5

File tree

3 files changed

+157
-0
lines changed

3 files changed

+157
-0
lines changed

benchmarks.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,9 @@ graphql:
7171
ractor: true
7272
graphql-native:
7373
desc: GraphQL gem parsing a large file, but using a native parser
74+
knucleotide:
75+
desc: k-nucleotide from the Computer Language Benchmarks Game - counts nucleotide frequencies using hash tables
76+
ractor: true
7477
lee:
7578
desc: lee is a circuit-board layout solver, deployed in a plausibly reality-like way
7679
matmul:
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# The Computer Language Benchmarks Game
2+
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
3+
#
4+
# k-nucleotide benchmark - Fastest implementation
5+
# Based on Ruby #1 with byteslice optimization
6+
7+
require_relative '../../harness/loader'
8+
9+
def frequency(seq, length)
10+
frequencies = Hash.new(0)
11+
last_index = seq.length - length
12+
13+
i = 0
14+
while i <= last_index
15+
frequencies[seq.byteslice(i, length)] += 1
16+
i += 1
17+
end
18+
19+
[seq.length - length + 1, frequencies]
20+
end
21+
22+
def sort_by_freq(seq, length)
23+
n, table = frequency(seq, length)
24+
25+
table.sort { |a, b|
26+
cmp = b[1] <=> a[1]
27+
cmp == 0 ? a[0] <=> b[0] : cmp
28+
}.map { |seq, count|
29+
"#{seq} #{'%.3f' % ((count * 100.0) / n)}"
30+
}.join("\n") + "\n\n"
31+
end
32+
33+
def find_seq(seq, s)
34+
n, table = frequency(seq, s.length)
35+
"#{table[s] || 0}\t#{s}\n"
36+
end
37+
38+
class Worker
39+
def initialize(&block)
40+
@r, @w = IO.pipe
41+
@p = Process.fork do
42+
@r.close
43+
@w.write yield
44+
@w.close
45+
end
46+
@w.close
47+
end
48+
49+
def result
50+
ret = @r.read
51+
@r.close
52+
Process.wait(@p)
53+
ret
54+
end
55+
end
56+
57+
def generate_test_sequence(size)
58+
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA" +
59+
"GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG" +
60+
"TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT" +
61+
"GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
62+
63+
sequence = ""
64+
full_copies = size / alu.length
65+
remainder = size % alu.length
66+
67+
full_copies.times { sequence << alu }
68+
sequence << alu[0, remainder] if remainder > 0
69+
70+
sequence.upcase
71+
end
72+
73+
TEST_SEQUENCE = generate_test_sequence(100_000)
74+
75+
run_benchmark(5) do
76+
freqs = [1, 2]
77+
nucleos = %w(GGT GGTA GGTATT GGTATTTTAATT GGTATTTTAATTTATAGT)
78+
79+
# Parallel processing with Process.fork
80+
workers = freqs.map { |i| Worker.new { sort_by_freq(TEST_SEQUENCE, i) } }
81+
workers += nucleos.map { |s| Worker.new { find_seq(TEST_SEQUENCE, s) } }
82+
83+
# Collect results
84+
results = workers.map(&:result)
85+
86+
# Process for benchmark harness
87+
results
88+
end

benchmarks/ractor/knucleotide.rb

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# The Computer Language Benchmarks Game
2+
# https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
3+
#
4+
# k-nucleotide benchmark - Ractor implementation
5+
# Mirrors the Process.fork version structure as closely as possible
6+
7+
require_relative "../../harness/loader"
8+
9+
def frequency(seq, length)
10+
frequencies = Hash.new(0)
11+
last_index = seq.length - length
12+
13+
i = 0
14+
while i <= last_index
15+
frequencies[seq.byteslice(i, length)] += 1
16+
i += 1
17+
end
18+
19+
[seq.length - length + 1, frequencies]
20+
end
21+
22+
def sort_by_freq(seq, length)
23+
n, table = frequency(seq, length)
24+
25+
table.sort { |a, b|
26+
cmp = b[1] <=> a[1]
27+
cmp == 0 ? a[0] <=> b[0] : cmp
28+
}.map! { |seq, count|
29+
"#{seq} #{'%.3f' % ((count * 100.0) / n)}"
30+
}.join("\n") << "\n\n"
31+
end
32+
33+
def find_seq(seq, s)
34+
_, table = frequency(seq, s.length)
35+
"#{table[s] || 0}\t#{s}\n"
36+
end
37+
38+
def generate_test_sequence(size)
39+
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA" +
40+
"GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG" +
41+
"TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT" +
42+
"GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
43+
44+
sequence = ""
45+
full_copies = size / alu.length
46+
remainder = size % alu.length
47+
48+
full_copies.times { sequence << alu }
49+
sequence << alu[0, remainder] if remainder > 0
50+
51+
sequence.upcase.freeze
52+
end
53+
54+
# Make sequence shareable for Ractors
55+
TEST_SEQUENCE = Ractor.make_shareable(generate_test_sequence(100_000))
56+
57+
run_benchmark(5) do |num_ractors, ractor_args|
58+
freqs = [1, 2]
59+
nucleos = %w(GGT GGTA GGTATT GGTATTTTAATT GGTATTTTAATTTATAGT)
60+
61+
# Sequential version - mirrors Process version but without Workers
62+
results = []
63+
freqs.each { |i| results << sort_by_freq(TEST_SEQUENCE, i) }
64+
nucleos.each { |s| results << find_seq(TEST_SEQUENCE, s) }
65+
results
66+
end

0 commit comments

Comments
 (0)