Skip to content

Commit

Permalink
fix: remove module functions from JaroWinkler
Browse files Browse the repository at this point in the history
JaroWinkler is not designed for mixin, replace rb_define_module_function with
rb_define_singleton_method.

BREAKING CHANGE: JaroWinkler no longer supports mixin, use class methods instead, ex.
`JaroWinkler.distance`
  • Loading branch information
tonytonyjan committed Sep 29, 2017
1 parent feca1a5 commit af249d5
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 91 deletions.
6 changes: 3 additions & 3 deletions ext/jaro_winkler/jaro_winkler.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ void Init_jaro_winkler_ext(void){
rb_mJaroWinkler = rb_define_module("JaroWinkler");
rb_eError = rb_define_class_under(rb_mJaroWinkler, "Error", rb_eRuntimeError);
rb_eInvalidWeightError = rb_define_class_under(rb_mJaroWinkler, "InvalidWeightError", rb_eError);
rb_define_module_function(rb_mJaroWinkler, "distance", rb_jaro_winkler_distance, -1);
rb_define_module_function(rb_mJaroWinkler, "jaro_distance", rb_jaro_distance, -1);
rb_define_singleton_method(rb_mJaroWinkler, "distance", rb_jaro_winkler_distance, -1);
rb_define_singleton_method(rb_mJaroWinkler, "jaro_distance", rb_jaro_distance, -1);
}


Expand Down Expand Up @@ -42,4 +42,4 @@ VALUE rb_jaro_distance(int argc, VALUE *argv, VALUE self){

VALUE rb_jaro_winkler_distance(int argc, VALUE *argv, VALUE self){
return distance(argc, argv, self, jaro_winkler_distance);
}
}
177 changes: 89 additions & 88 deletions lib/jaro_winkler/jaro_winkler_pure.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,116 +10,117 @@ class InvalidWeightError < Error; end
jaro_winkler: {weight: DEFAULT_WEIGHT, threshold: DEFAULT_THRESHOLD}
}

module_function

def distance str1, str2, options={}
_distance str1.codepoints.to_a, str2.codepoints.to_a, options
end

def jaro_distance str1, str2, options={}
_jaro_distance str1.codepoints.to_a, str2.codepoints.to_a, options
end
class << self
def distance str1, str2, options={}
_distance str1.codepoints.to_a, str2.codepoints.to_a, options
end

def _distance codes1, codes2, options={}
options = DEFAULT_OPTIONS[:jaro_winkler].merge options
raise InvalidWeightError if options[:weight] > 0.25
jaro_distance = _jaro_distance(codes1, codes2, options);
def jaro_distance str1, str2, options={}
_jaro_distance str1.codepoints.to_a, str2.codepoints.to_a, options
end

if jaro_distance < options[:threshold]
jaro_distance
else
codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
len1, len2 = codes1.length, codes2.length
max_4 = len1 > 4 ? 4 : len1
prefix = 0
while prefix < max_4 && codes1[prefix] == codes2[prefix]
prefix += 1
private

def _distance codes1, codes2, options={}
options = DEFAULT_OPTIONS[:jaro_winkler].merge options
raise InvalidWeightError if options[:weight] > 0.25
jaro_distance = _jaro_distance(codes1, codes2, options);

if jaro_distance < options[:threshold]
jaro_distance
else
codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
len1, len2 = codes1.length, codes2.length
max_4 = len1 > 4 ? 4 : len1
prefix = 0
while prefix < max_4 && codes1[prefix] == codes2[prefix]
prefix += 1
end
jaro_distance + prefix * options[:weight] * (1 - jaro_distance)
end
jaro_distance + prefix * options[:weight] * (1 - jaro_distance)
end
end

def _jaro_distance codes1, codes2, options={}
options = DEFAULT_OPTIONS[:jaro].merge options

codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
len1, len2 = codes1.length, codes2.length
return 0.0 if len1 == 0 || len2 == 0

if options[:ignore_case]
codes1.map!{ |c| c >= 97 && c <= 122 ? c -= 32 : c }
codes2.map!{ |c| c >= 97 && c <= 122 ? c -= 32 : c }
end
def _jaro_distance codes1, codes2, options={}
options = DEFAULT_OPTIONS[:jaro].merge options

window = len2/2 - 1
window = 0 if(window < 0)
flags1, flags2 = 0, 0
codes1, codes2 = codes2, codes1 if codes1.length > codes2.length
len1, len2 = codes1.length, codes2.length
return 0.0 if len1 == 0 || len2 == 0

# // count number of matching characters
match_count = 0;
i = 0
while i < len1
left = (i >= window) ? i - window : 0
right = (i + window <= len2 - 1) ? (i + window) : (len2 - 1)
right = len2 - 1 if right > len2 - 1
j = left
while j <= right
if flags2[j] == 0 && codes1[i] == codes2[j]
flags1 |= (1 << i)
flags2 |= (1 << j)
match_count += 1
break
end
j +=1
if options[:ignore_case]
codes1.map!{ |c| c >= 97 && c <= 122 ? c -= 32 : c }
codes2.map!{ |c| c >= 97 && c <= 122 ? c -= 32 : c }
end
i += 1
end

return 0.0 if match_count == 0
window = len2/2 - 1
window = 0 if(window < 0)
flags1, flags2 = 0, 0

# // count number of transpositions
transposition_count = j = k = 0
i = 0
while i < len1
if flags1[i] == 1
j = k
while j < len2
if flags2[j] == 1
k = j + 1;
break;
# // count number of matching characters
match_count = 0;
i = 0
while i < len1
left = (i >= window) ? i - window : 0
right = (i + window <= len2 - 1) ? (i + window) : (len2 - 1)
right = len2 - 1 if right > len2 - 1
j = left
while j <= right
if flags2[j] == 0 && codes1[i] == codes2[j]
flags1 |= (1 << i)
flags2 |= (1 << j)
match_count += 1
break
end
j += 1
j +=1
end
transposition_count += 1 if codes1[i] != codes2[j]
i += 1
end
i += 1
end

# // count similarities in nonmatched characters
similar_count = 0
if options[:adj_table] && len1 > match_count
return 0.0 if match_count == 0

# // count number of transpositions
transposition_count = j = k = 0
i = 0
while i < len1
if flags1[i] == 0
j = 0
if flags1[i] == 1
j = k
while j < len2
if flags2[j] == 0
if DEFAULT_ADJ_TABLE[codes1[i].chr(Encoding::UTF_8)][codes2[j].chr(Encoding::UTF_8)]
similar_count += 3
break
end
if flags2[j] == 1
k = j + 1;
break;
end
j += 1
end
transposition_count += 1 if codes1[i] != codes2[j]
end
i += 1
end
end

m = match_count.to_f
t = transposition_count/2
m = similar_count/10.0 + m if options[:adj_table]
(m/len1 + m/len2 + (m-t)/m) / 3
end
# // count similarities in nonmatched characters
similar_count = 0
if options[:adj_table] && len1 > match_count
i = 0
while i < len1
if flags1[i] == 0
j = 0
while j < len2
if flags2[j] == 0
if DEFAULT_ADJ_TABLE[codes1[i].chr(Encoding::UTF_8)][codes2[j].chr(Encoding::UTF_8)]
similar_count += 3
break
end
end
j += 1
end
end
i += 1
end
end

end
m = match_count.to_f
t = transposition_count/2
m = similar_count/10.0 + m if options[:adj_table]
(m/len1 + m/len2 + (m-t)/m) / 3
end
end
end

0 comments on commit af249d5

Please sign in to comment.