Skip to content

Commit

Permalink
Fixes #1
Browse files Browse the repository at this point in the history
  • Loading branch information
xrash committed Jun 21, 2014
1 parent b62237a commit e698fd9
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
16 changes: 13 additions & 3 deletions jaro.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@ import (
)

func Jaro(a, b string) float64 {
matchRange := int(math.Floor(math.Max(float64(len(a)), float64(len(b))) / float64(2))) - 1
la := float64(len(a))
lb := float64(len(b))

// match range = max(len(a), len(b)) / 2 - 1
matchRange := int(math.Floor(math.Max(la, lb) / 2.0)) - 1
matchRange = int(math.Max(0, float64(matchRange - 1)))
var matches, halfs float64
transposed := make([]bool, len(b))

for i := 0; i < len(a); i++ {
start := int(math.Max(0, float64(i-matchRange)))
end := int(math.Min(float64(len(b)-1), float64(i+matchRange)))
end := int(math.Min(lb - 1, float64(i+matchRange)))

for j := start; j <= end; j++ {
if transposed[j] {
Expand All @@ -30,5 +34,11 @@ func Jaro(a, b string) float64 {
}
}

return ((matches/float64(len(a))) + (matches/float64(len(b))) + ((matches-math.Floor(float64(halfs/2))))/matches) / float64(3)
if matches == 0 {
return 0
}

transposes := math.Floor(float64(halfs/2))

return ((matches/la) + (matches/lb) + (matches-transposes)/matches) / 3.0
}
3 changes: 3 additions & 0 deletions tests/jaro-winkler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ func TestJaroWinkler(t *testing.T) {
{"MARTHA", "MARHTA", 0.9611111111111111},
{"JONES", "JOHNSON", 0.8323809523809523},
{"ABCVWXYZ", "CABVWXYZ", 0.9625},
{"A", "B", 0},
{"ABCDEF", "123456", 0},
{"AAAAAAAAABCCCC", "AAAAAAAAABCCCC", 1},
}

for _, c := range cases {
Expand Down
3 changes: 3 additions & 0 deletions tests/jaro_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ func TestJaro(t *testing.T) {
{"MARTHA", "MARHTA", 0.9444444444444445},
{"JONES", "JOHNSON", 0.7904761904761904},
{"ABCVWXYZ", "CABVWXYZ", 0.9583333333333334},
{"A", "B", 0},
{"ABCDEF", "123456", 0},
{"AAAAAAAAABCCCC", "AAAAAAAAABCCCC", 1},
}

for _, c := range cases {
Expand Down

1 comment on commit e698fd9

@ydnar
Copy link

@ydnar ydnar commented on e698fd9 Jun 21, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, fast! Thanks!

Please sign in to comment.