From 51f25ea5957cfc2c13eae65cfa884a39681c1852 Mon Sep 17 00:00:00 2001 From: Anton Rieder Date: Tue, 15 Jun 2021 11:48:21 +0200 Subject: [PATCH] Implement sorting using `twitter_cldr` --- lib/naturally.rb | 8 +++-- lib/naturally/segment.rb | 64 ++++++++++++++++++++++++++++++++++++++-- naturally.gemspec | 2 ++ spec/naturally_spec.rb | 34 +++++++++++++++++++++ 4 files changed, 104 insertions(+), 4 deletions(-) diff --git a/lib/naturally.rb b/lib/naturally.rb index 7e09227..f328da8 100644 --- a/lib/naturally.rb +++ b/lib/naturally.rb @@ -33,6 +33,10 @@ def self.sort_by(an_array, an_attribute=nil, &block) an_array.sort_by { |obj| normalize(obj.send(an_attribute)) } end + def self.sort_with_collator(an_array, collator) + an_array.sort_by { |x| normalize(x, collator) } + end + # Convert the given number to an array of {Segment}s. # This enables it to be sorted against other arrays # by the built-in #sort method. @@ -44,9 +48,9 @@ def self.sort_by(an_array, an_attribute=nil, &block) # such as 1.2a.3. # @return [Array] an array of Segments which # can be sorted naturally via a standard #sort. - def self.normalize(complex_number) + def self.normalize(complex_number, collator = nil) tokens = complex_number.to_s.gsub(/\_/,'').scan(/\p{Word}+/) - tokens.map { |t| Segment.new(t) } + tokens.map { |t| Segment.new(t, collator) } end private diff --git a/lib/naturally/segment.rb b/lib/naturally/segment.rb index 5b7bf29..31159bc 100644 --- a/lib/naturally/segment.rb +++ b/lib/naturally/segment.rb @@ -6,12 +6,19 @@ module Naturally class Segment include Comparable - def initialize(v) + def initialize(v, collator = nil) @val = v + @collator = collator end def <=>(other) - to_array <=> other.to_array + other_array = other.to_array + + if @collator + compare_using_collator_for_strings(to_array, other_array) + else + to_array <=> other.to_array + end end # @return [Array] a representation of myself in array form @@ -40,5 +47,58 @@ def to_array [:str, @val] end end + + private + + # Compare to arrays according to the rules of Ruby, using a collator to + # compare String elements. + # https://github.com/ruby/ruby/blob/v3_0_1/array.c#L5173-L5210 + # + # call-seq: + # array <=> other_array -> -1, 0, or 1 + # + # Returns -1, 0, or 1 as +self+ is less than, equal to, or greater than +other_array+. + # For each index +i+ in +self+, evaluates result = self[i] <=> other_array[i]. + # + # Returns -1 if any result is -1: + # [0, 1, 2] <=> [0, 1, 3] # => -1 + # + # Returns 1 if any result is 1: + # [0, 1, 2] <=> [0, 1, 1] # => 1 + # + # When all results are zero: + # - Returns -1 if +array+ is smaller than +other_array+: + # [0, 1, 2] <=> [0, 1, 2, 3] # => -1 + # - Returns 1 if +array+ is larger than +other_array+: + # [0, 1, 2] <=> [0, 1] # => 1 + # - Returns 0 if +array+ and +other_array+ are the same size: + # [0, 1, 2] <=> [0, 1, 2] # => 0 + # + def compare_using_collator_for_strings(array_1, array_2) + cmp = 0 + + array_1.each_with_index do |element, index| + next unless index < array_2.length + + case element + when Integer, Symbol + cmp = element <=> array_2[index] + when String + cmp = @collator.compare(element, array_2[index]) + else + raise ArgumentError, "Cannot compare #{e.class} with #{array_2[index].class}" + end + + break if cmp != 0 + end + + return cmp unless cmp == 0 + + length_difference = array_1.length - array_2.length + + return 0 if length_difference == 0 + return 1 if length_difference > 0 + return -1 + end end end diff --git a/naturally.gemspec b/naturally.gemspec index 3d8d027..cfcf3a8 100644 --- a/naturally.gemspec +++ b/naturally.gemspec @@ -13,6 +13,8 @@ Gem::Specification.new do |gem| gem.homepage = "http://github.com/dogweather/naturally" gem.required_ruby_version = '>= 2.0' + gem.add_development_dependency 'twitter_cldr' + gem.files = `git ls-files`.split($/) gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) diff --git a/spec/naturally_spec.rb b/spec/naturally_spec.rb index e5c88a0..f7e055e 100644 --- a/spec/naturally_spec.rb +++ b/spec/naturally_spec.rb @@ -208,4 +208,38 @@ def it_sorts(opts = {}) ] end end + + describe 'using a collator' do + require 'twitter_cldr' + + let(:collator) { TwitterCldr::Collation::Collator.new(:de) } + + it 'sorts unicode characters correctly when using a collator' do + unicode_strings = %w( Öl10 b öl3 a Öl1 Öl2 A B ) + actual = Naturally.sort_with_collator(unicode_strings, collator) + + expect(actual).to eq %w( a A b B öl3 Öl1 Öl2 Öl10 ) + end + + # https://github.com/dogweather/naturally/issues/20#issuecomment-450617803 + it 'sorts neither like the Duden nor the telephone book for German' do + names = [ + 'Müller, Franziska', + 'Muller, Inge', + 'Müller, Hansi', + 'Muller, Erika', + 'Mueller, Gerd' + ] + + actual = Naturally.sort_with_collator(names, collator) + + expect(actual).to eq [ + 'Mueller, Gerd', + 'Muller, Erika', + 'Muller, Inge', + 'Müller, Franziska', + 'Müller, Hansi' + ] + end + end end