Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
agnivade committed May 21, 2021
1 parent c1c2800 commit 51ec67d
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
20 changes: 16 additions & 4 deletions levenshtein.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ package levenshtein

import "unicode/utf8"

// minLengthThreshold is the length of the string beyond which
// an allocation will be made. Strings smaller than this will be
// zero alloc.
const minLengthThreshold = 32

// ComputeDistance computes the levenshtein distance between the two
// strings passed as an argument. The return value is the levenshtein distance
//
Expand Down Expand Up @@ -39,12 +44,19 @@ func ComputeDistance(a, b string) int {
lenS1 := len(s1)
lenS2 := len(s2)

// init the row
x := make([]uint16, 0, 32)
if cap(x) < lenS1+1 {
// Init the row.
var x []uint16
if lenS1+1 > minLengthThreshold {
x = make([]uint16, lenS1+1)
} else {
// We make a small optimization here for small strings.
// Because a slice of constant length is effectively an array,
// it does not allocate. So we can re-slice it to the right length
// as long as it is below a desired threshold.
x = make([]uint16, minLengthThreshold)
x = x[:lenS1+1]
}
x = x[:lenS1+1]

// we start from 1 because index 0 is already 0.
for i := 1; i < len(x); i++ {
x[i] = uint16(i)
Expand Down
2 changes: 2 additions & 0 deletions levenshtein_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func TestSanity(t *testing.T) {
{"distance", "difference", 5},
{"levenshtein", "frankenstein", 6},
{"resume and cafe", "resumes and cafes", 2},
{"a very long string that is meant to exceed", "another very long string that is meant to exceed", 6},
}
for i, d := range tests {
n := agnivade.ComputeDistance(d.a, d.b)
Expand Down Expand Up @@ -69,6 +70,7 @@ func BenchmarkSimple(b *testing.B) {
// Testing acutes and umlauts
{"resumé and café", "resumés and cafés", "French"},
{"Hafþór Júlíus Björnsson", "Hafþor Julius Bjornsson", "Nordic"},
{"a very long string that is meant to exceed", "another very long string that is meant to exceed", "long string"},
// Only 2 characters are less in the 2nd string
{"།་གམ་འས་པ་་མ།", "།་གམའས་པ་་མ", "Tibetan"},
}
Expand Down

0 comments on commit 51ec67d

Please sign in to comment.