-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
246 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
module Bmg | ||
class Summarizer | ||
# | ||
# Bucketizer summarizer. | ||
# | ||
# Example: | ||
# | ||
# # direct ruby usage | ||
# Bmg::Summarizer.bucketize(:qty, :size => 2).summarize(...) | ||
# | ||
class Bucketize < Summarizer | ||
|
||
# Sets default options. | ||
def default_options | ||
{ :size => 10 } | ||
end | ||
|
||
# Returns least value (defaults to "") | ||
def least() | ||
[[], []] | ||
end | ||
|
||
# Concatenates current memo with val.to_s | ||
def _happens(memo, val) | ||
memo.first << val | ||
memo | ||
end | ||
|
||
# Finalizes computation | ||
def finalize(memo) | ||
buckets = compute_buckets(memo.first, options[:size]) | ||
buckets = touching_buckets(buckets) if options[:boundaries] == :touching | ||
buckets | ||
end | ||
|
||
private | ||
|
||
def compute_buckets(values, num_buckets = 10) | ||
sorted_values = values.sort | ||
sorted_values = sorted_values.map{|v| v.to_s[0...options[:value_length]] } if options[:value_length] | ||
sorted_values = sorted_values.uniq if options[:distinct] | ||
|
||
# Calculate the size of each bucket | ||
total_values = sorted_values.length | ||
bucket_size = (total_values / num_buckets.to_f).ceil | ||
|
||
# Create the ranges for each bucket | ||
bucket_ranges = [] | ||
num_buckets.times do |i| | ||
start_index = i * bucket_size | ||
break if start_index >= total_values # Ensure we do not exceed the array bounds | ||
|
||
end_index = [(start_index + bucket_size - 1), total_values - 1].min | ||
start_value = sorted_values[start_index] | ||
end_value = sorted_values[end_index] | ||
bucket_ranges << (start_value..end_value) | ||
end | ||
|
||
bucket_ranges | ||
end | ||
|
||
def touching_buckets(buckets) | ||
result = [] | ||
buckets.each do |b| | ||
r_start = result.empty? ? b.begin : result.last.end | ||
r_end = b.end | ||
result << (r_start...r_end) | ||
end | ||
result[-1] = (result.last.begin..result.last.end) | ||
|
||
result | ||
end | ||
|
||
end # class Concat | ||
|
||
# Factors a bucketize summarizer | ||
def self.bucketize(*args, &bl) | ||
Bucketize.new(*args, &bl) | ||
end | ||
|
||
end # class Summarizer | ||
end # module Bmg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
require 'spec_helper' | ||
module Bmg | ||
class Summarizer | ||
describe Bucketize do | ||
|
||
def bucketizer(attr, opts) | ||
Bucketize.new(attr, options.merge(opts)) | ||
end | ||
|
||
let(:options) { | ||
{ } | ||
} | ||
|
||
context 'with distinct values' do | ||
let(:rel){[ | ||
{:sid => 'S1', :qty => 10}, | ||
{:sid => 'S2', :qty => 20}, | ||
{:sid => 'S3', :qty => 30}, | ||
{:sid => 'S4', :qty => 40} | ||
]} | ||
|
||
context 'with separate boundaries' do | ||
let(:options) { | ||
{ boundaries: :separate } | ||
} | ||
|
||
it 'should work when used standalone' do | ||
got = bucketizer(:qty, :size => 1).summarize(rel) | ||
expect(got).to eql([10..40]) | ||
|
||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10..20, 30..40]) | ||
|
||
got = bucketizer(:qty, :size => 3).summarize(rel) | ||
expect(got).to eql([10..20, 30..40]) | ||
|
||
got = bucketizer(:qty, :size => 4).summarize(rel) | ||
expect(got).to eql([10..10, 20..20, 30..30, 40..40]) | ||
end | ||
end | ||
|
||
context 'with touching boundaries' do | ||
let(:options) { | ||
{ boundaries: :touching } | ||
} | ||
|
||
it 'should work when used standalone' do | ||
got = bucketizer(:qty, :size => 1).summarize(rel) | ||
expect(got).to eql([10..40]) | ||
|
||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10...20, 20..40]) | ||
|
||
got = bucketizer(:qty, :size => 3).summarize(rel) | ||
expect(got).to eql([10...20, 20..40]) | ||
|
||
got = bucketizer(:qty, :size => 4).summarize(rel) | ||
expect(got).to eql([10...10, 10...20, 20...30, 30..40]) | ||
end | ||
end | ||
end | ||
|
||
context 'with non distinct values' do | ||
let(:rel){[ | ||
{:sid => 'S1', :qty => 10}, | ||
{:sid => 'S2', :qty => 20}, | ||
{:sid => 'S3', :qty => 30}, | ||
{:sid => 'S4', :qty => 40}, | ||
{:sid => 'S5', :qty => 40} | ||
]} | ||
|
||
context 'with separate boundaries' do | ||
let(:options) { | ||
{ boundaries: :separate } | ||
} | ||
|
||
it 'should work when used standalone' do | ||
got = bucketizer(:qty, :size => 1).summarize(rel) | ||
expect(got).to eql([10..40]) | ||
|
||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10..30, 40..40]) | ||
|
||
got = bucketizer(:qty, :size => 3).summarize(rel) | ||
expect(got).to eql([10..20, 30..40, 40..40]) | ||
|
||
got = bucketizer(:qty, :size => 4).summarize(rel) | ||
expect(got).to eql([10..20, 30..40, 40..40]) | ||
end | ||
|
||
it 'supports distinct' do | ||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10..30, 40..40]) | ||
|
||
got = bucketizer(:qty, :size => 2, :distinct => true).summarize(rel) | ||
expect(got).to eql([10..20, 30..40]) | ||
end | ||
end | ||
|
||
context 'with touching boundaries' do | ||
let(:options) { | ||
{ boundaries: :touching } | ||
} | ||
|
||
it 'should work when used standalone' do | ||
got = bucketizer(:qty, :size => 1).summarize(rel) | ||
expect(got).to eql([10..40]) | ||
|
||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10...30, 30..40]) | ||
|
||
got = bucketizer(:qty, :size => 3).summarize(rel) | ||
expect(got).to eql([10...20, 20...40, 40..40]) | ||
|
||
got = bucketizer(:qty, :size => 4).summarize(rel) | ||
expect(got).to eql([10...20, 20...40, 40..40]) | ||
end | ||
|
||
it 'supports distinct' do | ||
got = bucketizer(:qty, :size => 2).summarize(rel) | ||
expect(got).to eql([10...30, 30..40]) | ||
|
||
got = bucketizer(:qty, :size => 2, :distinct => true).summarize(rel) | ||
expect(got).to eql([10...20, 20..40]) | ||
end | ||
end | ||
end | ||
|
||
context 'with string values and touching boundaries' do | ||
let(:rel){ | ||
["Denver", "Austin", "Chicago", "Boston", "Dallas", "Atlanta", "Detroit", "Houston", "San Francisco", "Los Angeles", "New York", "Seattle", "Miami", "Phoenix", "Las Vegas"].map{|city| | ||
{ :city => city } | ||
} | ||
} | ||
|
||
it 'should work as expected with touching' do | ||
got = bucketizer(:city, size: 5, boundaries: :touching).summarize(rel) | ||
expect(got).to eql(["Atlanta"..."Boston", "Boston"..."Denver", "Denver"..."Las Vegas", "Las Vegas"..."New York", "New York".."Seattle"]) | ||
end | ||
|
||
it 'should work as expected with touching and value_length' do | ||
got = bucketizer(:city, size: 5, boundaries: :touching, value_length: 3).summarize(rel) | ||
expect(got).to eql(["Atl"..."Bos", "Bos"..."Den", "Den"..."Las", "Las"..."New", "New".."Sea"]) | ||
end | ||
|
||
it 'should work as expected with separate' do | ||
got = bucketizer(:city, size: 5, boundaries: :separate).summarize(rel) | ||
expect(got).to eql(["Atlanta".."Boston", "Chicago".."Denver", "Detroit".."Las Vegas", "Los Angeles".."New York", "Phoenix".."Seattle"]) | ||
end | ||
|
||
it 'should work as expected with separate and value_length' do | ||
got = bucketizer(:city, size: 5, boundaries: :separate, value_length: 3).summarize(rel) | ||
expect(got).to eql(["Atl".."Bos", "Chi".."Den", "Det".."Las", "Los".."New", "Pho".."Sea"]) | ||
end | ||
end | ||
|
||
end | ||
end | ||
end |