Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement sorting using twitter_cldr #32

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lib/naturally.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def self.sort_by(an_array, an_attribute=nil, &block)
an_array.sort_by { |obj| normalize(obj.send(an_attribute)) }
end

def self.sort_with_collator(an_array, collator)
an_array.sort_by { |x| normalize(x, collator) }
end

# Convert the given number to an array of {Segment}s.
# This enables it to be sorted against other arrays
# by the built-in #sort method.
Expand All @@ -44,9 +48,9 @@ def self.sort_by(an_array, an_attribute=nil, &block)
# such as 1.2a.3.
# @return [Array<Segment>] an array of Segments which
# can be sorted naturally via a standard #sort.
def self.normalize(complex_number)
def self.normalize(complex_number, collator = nil)
tokens = complex_number.to_s.gsub(/\_/,'').scan(/\p{Word}+/)
tokens.map { |t| Segment.new(t) }
tokens.map { |t| Segment.new(t, collator) }
end

private
Expand Down
64 changes: 62 additions & 2 deletions lib/naturally/segment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@ module Naturally
class Segment
include Comparable

def initialize(v)
def initialize(v, collator = nil)
@val = v
@collator = collator
end

def <=>(other)
to_array <=> other.to_array
other_array = other.to_array

if @collator
compare_using_collator_for_strings(to_array, other_array)
else
to_array <=> other.to_array
end
end

# @return [Array] a representation of myself in array form
Expand Down Expand Up @@ -40,5 +47,58 @@ def to_array
[:str, @val]
end
end

private

# Compare to arrays according to the rules of Ruby, using a collator to
# compare String elements.
# https://github.com/ruby/ruby/blob/v3_0_1/array.c#L5173-L5210
#
# call-seq:
# array <=> other_array -> -1, 0, or 1
#
# Returns -1, 0, or 1 as +self+ is less than, equal to, or greater than +other_array+.
# For each index +i+ in +self+, evaluates <tt>result = self[i] <=> other_array[i]</tt>.
#
# Returns -1 if any result is -1:
# [0, 1, 2] <=> [0, 1, 3] # => -1
#
# Returns 1 if any result is 1:
# [0, 1, 2] <=> [0, 1, 1] # => 1
#
# When all results are zero:
# - Returns -1 if +array+ is smaller than +other_array+:
# [0, 1, 2] <=> [0, 1, 2, 3] # => -1
# - Returns 1 if +array+ is larger than +other_array+:
# [0, 1, 2] <=> [0, 1] # => 1
# - Returns 0 if +array+ and +other_array+ are the same size:
# [0, 1, 2] <=> [0, 1, 2] # => 0
#
def compare_using_collator_for_strings(array_1, array_2)
cmp = 0

array_1.each_with_index do |element, index|
next unless index < array_2.length

case element
when Integer, Symbol
cmp = element <=> array_2[index]
when String
cmp = @collator.compare(element, array_2[index])
else
raise ArgumentError, "Cannot compare #{e.class} with #{array_2[index].class}"
end

break if cmp != 0
end

return cmp unless cmp == 0

length_difference = array_1.length - array_2.length

return 0 if length_difference == 0
return 1 if length_difference > 0
return -1
end
end
end
2 changes: 2 additions & 0 deletions naturally.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Gem::Specification.new do |gem|
gem.homepage = "http://github.com/dogweather/naturally"
gem.required_ruby_version = '>= 2.0'

gem.add_development_dependency 'twitter_cldr'

gem.files = `git ls-files`.split($/)
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
Expand Down
34 changes: 34 additions & 0 deletions spec/naturally_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,38 @@ def it_sorts(opts = {})
]
end
end

describe 'using a collator' do
require 'twitter_cldr'

let(:collator) { TwitterCldr::Collation::Collator.new(:de) }

it 'sorts unicode characters correctly when using a collator' do
unicode_strings = %w( Öl10 b öl3 a Öl1 Öl2 A B )
actual = Naturally.sort_with_collator(unicode_strings, collator)

expect(actual).to eq %w( a A b B öl3 Öl1 Öl2 Öl10 )
end

# https://github.com/dogweather/naturally/issues/20#issuecomment-450617803
it 'sorts neither like the Duden nor the telephone book for German' do
names = [
'Müller, Franziska',
'Muller, Inge',
'Müller, Hansi',
'Muller, Erika',
'Mueller, Gerd'
]

actual = Naturally.sort_with_collator(names, collator)

expect(actual).to eq [
'Mueller, Gerd',
'Muller, Erika',
'Muller, Inge',
'Müller, Franziska',
'Müller, Hansi'
]
end
end
end