diff --git a/go.mod b/go.mod index 3b23e2b6..b59b9e47 100644 --- a/go.mod +++ b/go.mod @@ -9,11 +9,10 @@ require ( github.com/obalunenko/logger v0.2.0 github.com/obalunenko/version v1.1.0 github.com/stretchr/testify v1.7.1 - github.com/urfave/cli/v2 v2.8.0 + github.com/urfave/cli/v2 v2.8.1 ) require ( - github.com/antzucaro/matchr v0.0.0-20210222213004-b04723ef80f0 // indirect github.com/certifi/gocertifi v0.0.0-20210507211836-431795d63e8d // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect @@ -27,6 +26,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect + github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 // indirect gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect ) diff --git a/go.sum b/go.sum index d33729ff..32faec3a 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,3 @@ -github.com/antzucaro/matchr v0.0.0-20210222213004-b04723ef80f0 h1:R/qAiUxFT3mNgQaNqJe0IVznjKRNm23ohAIh9lgtlzc= -github.com/antzucaro/matchr v0.0.0-20210222213004-b04723ef80f0/go.mod h1:v3ZDlfVAL1OrkKHbGSFFK60k0/7hruHPDq2XMs9Gu6U= github.com/briandowns/spinner v1.18.1 h1:yhQmQtM1zsqFsouh09Bk/jCjd50pC3EOGsh28gLVvwY= github.com/briandowns/spinner v1.18.1/go.mod h1:mQak9GHqbspjC/5iUx3qMlIho8xBS/ppAL/hX5SmPJU= github.com/certifi/gocertifi v0.0.0-20210507211836-431795d63e8d h1:S2NE3iHSwP0XV47EEXL8mWmRdEfGscSJ+7EgePNgt0s= @@ -45,8 +43,10 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/urfave/cli/v2 v2.8.0 h1:EZsAB20oRW4nHcB99TTL6PrXpBGIEujMEKdjwruY9KQ= -github.com/urfave/cli/v2 v2.8.0/go.mod h1:TYFbtzt/azQoJOrGH5mDfZtS0jIkl/OeFwlRWPR9KRM= +github.com/urfave/cli/v2 v2.8.1 h1:CGuYNZF9IKZY/rfBe3lJpccSoIY1ytfvmgQT90cNOl4= +github.com/urfave/cli/v2 v2.8.1/go.mod h1:Z41J9TPoffeoqP0Iza0YbAhGvymRdZAd2uPmZ5JxRdY= +github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= +github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/antzucaro/matchr/COPYING.txt b/vendor/github.com/antzucaro/matchr/COPYING.txt deleted file mode 100644 index 169aa50d..00000000 --- a/vendor/github.com/antzucaro/matchr/COPYING.txt +++ /dev/null @@ -1,19 +0,0 @@ -Matchr: an approximate string matching library for the Go programming language - -Copyright (C) 2013-2014 Ant Zucaro - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -You can contact Ant Zucaro at azucaro at gmail dot com. diff --git a/vendor/github.com/antzucaro/matchr/README.md b/vendor/github.com/antzucaro/matchr/README.md deleted file mode 100644 index 5aabfb1a..00000000 --- a/vendor/github.com/antzucaro/matchr/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# matchr - -An approximate string matching library for the [Go programming language](http://www.golang.org). - -## Rationale - -Data used in record linkage can often be of dubious quality. Typographical -errors or changing data elements (to name a few things) make establishing similarity between two sets of data -difficult. Rather than use exact string comparison in such situations, it is -vital to have a means to identify how similar two strings are. Similarity functions can cater -to certain data sets in order to make better matching decisions. The matchr library provides -several of these similarity functions. diff --git a/vendor/github.com/antzucaro/matchr/damerau_levenshtein.go b/vendor/github.com/antzucaro/matchr/damerau_levenshtein.go deleted file mode 100644 index 23c76cec..00000000 --- a/vendor/github.com/antzucaro/matchr/damerau_levenshtein.go +++ /dev/null @@ -1,112 +0,0 @@ -package matchr - -// DamerauLevenshtein computes the Damerau-Levenshtein distance between two -// strings. The returned value - distance - is the number of insertions, -// deletions, substitutions, and transpositions it takes to transform one -// string (s1) into another (s2). Each step in the transformation "costs" -// one distance point. It is similar to the Optimal String Alignment, -// algorithm, but is more complex because it allows multiple edits on -// substrings. -// -// This implementation is based off of the one found on Wikipedia at -// http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions -// as well as KevinStern's Java implementation found at -// https://github.com/KevinStern/software-and-algorithms. -func DamerauLevenshtein(s1 string, s2 string) (distance int) { - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - // the maximum possible distance - inf := len(r1) + len(r2) - - // if one string is blank, we needs insertions - // for all characters in the other one - if len(r1) == 0 { - return len(r2) - } - - if len(r2) == 0 { - return len(r1) - } - - // construct the edit-tracking matrix - matrix := make([][]int, len(r1)) - for i := range matrix { - matrix[i] = make([]int, len(r2)) - } - - // seen characters - seenRunes := make(map[rune]int) - - if r1[0] != r2[0] { - matrix[0][0] = 1 - } - - seenRunes[r1[0]] = 0 - for i := 1; i < len(r1); i++ { - deleteDist := matrix[i-1][0] + 1 - insertDist := (i+1)*1 + 1 - var matchDist int - if r1[i] == r2[0] { - matchDist = i - } else { - matchDist = i + 1 - } - matrix[i][0] = min(min(deleteDist, insertDist), matchDist) - } - - for j := 1; j < len(r2); j++ { - deleteDist := (j + 1) * 2 - insertDist := matrix[0][j-1] + 1 - var matchDist int - if r1[0] == r2[j] { - matchDist = j - } else { - matchDist = j + 1 - } - - matrix[0][j] = min(min(deleteDist, insertDist), matchDist) - } - - for i := 1; i < len(r1); i++ { - var maxSrcMatchIndex int - if r1[i] == r2[0] { - maxSrcMatchIndex = 0 - } else { - maxSrcMatchIndex = -1 - } - - for j := 1; j < len(r2); j++ { - swapIndex, ok := seenRunes[r2[j]] - jSwap := maxSrcMatchIndex - deleteDist := matrix[i-1][j] + 1 - insertDist := matrix[i][j-1] + 1 - matchDist := matrix[i-1][j-1] - if r1[i] != r2[j] { - matchDist += 1 - } else { - maxSrcMatchIndex = j - } - - // for transpositions - var swapDist int - if ok && jSwap != -1 { - iSwap := swapIndex - var preSwapCost int - if iSwap == 0 && jSwap == 0 { - preSwapCost = 0 - } else { - preSwapCost = matrix[maxI(0, iSwap-1)][maxI(0, jSwap-1)] - } - swapDist = i + j + preSwapCost - iSwap - jSwap - 1 - } else { - swapDist = inf - } - matrix[i][j] = min(min(min(deleteDist, insertDist), matchDist), swapDist) - } - seenRunes[r1[i]] = i - } - - return matrix[len(r1)-1][len(r2)-1] -} diff --git a/vendor/github.com/antzucaro/matchr/double_metaphone_corpus.txt.gz b/vendor/github.com/antzucaro/matchr/double_metaphone_corpus.txt.gz deleted file mode 100644 index 7902feab..00000000 Binary files a/vendor/github.com/antzucaro/matchr/double_metaphone_corpus.txt.gz and /dev/null differ diff --git a/vendor/github.com/antzucaro/matchr/hamming.go b/vendor/github.com/antzucaro/matchr/hamming.go deleted file mode 100644 index a6360754..00000000 --- a/vendor/github.com/antzucaro/matchr/hamming.go +++ /dev/null @@ -1,25 +0,0 @@ -package matchr - -import "errors" - -// Hamming computes the Hamming distance between two equal-length strings. -// This is the number of times the two strings differ between characters at -// the same index. This implementation is based off of the algorithm -// description found at http://en.wikipedia.org/wiki/Hamming_distance. -func Hamming(s1 string, s2 string) (distance int, err error) { - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - if len(r1) != len(r2) { - err = errors.New("Hamming distance of different sized strings.") - return - } - - for i, v := range r1 { - if r2[i] != v { - distance += 1 - } - } - return -} diff --git a/vendor/github.com/antzucaro/matchr/jarowinkler.go b/vendor/github.com/antzucaro/matchr/jarowinkler.go deleted file mode 100644 index 1e291b5c..00000000 --- a/vendor/github.com/antzucaro/matchr/jarowinkler.go +++ /dev/null @@ -1,135 +0,0 @@ -package matchr - -func jaroWinklerBase(s1 string, s2 string, - longTolerance bool, winklerize bool) (distance float64) { - - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - r1Length := len(r1) - r2Length := len(r2) - - if r1Length == 0 || r2Length == 0 { - return - } - - minLength := 0 - if r1Length > r2Length { - minLength = r1Length - } else { - minLength = r2Length - } - - searchRange := minLength - searchRange = (searchRange / 2) - 1 - if searchRange < 0 { - searchRange = 0 - } - var lowLim, hiLim, transCount, commonChars int - var i, j, k int - - r1Flag := make([]bool, r1Length+1) - r2Flag := make([]bool, r2Length+1) - - // find the common chars within the acceptable range - commonChars = 0 - for i, _ = range r1 { - if i >= searchRange { - lowLim = i - searchRange - } else { - lowLim = 0 - } - - if (i + searchRange) <= (r2Length - 1) { - hiLim = i + searchRange - } else { - hiLim = r2Length - 1 - } - - for j := lowLim; j <= hiLim; j++ { - if !r2Flag[j] && r2[j] == r1[i] { - r2Flag[j] = true - r1Flag[i] = true - commonChars++ - - break - } - } - } - - // if we have nothing in common at this point, nothing else can be done - if commonChars == 0 { - return - } - - // otherwise we count the transpositions - k = 0 - transCount = 0 - for i, _ := range r1 { - if r1Flag[i] { - for j = k; j < r2Length; j++ { - if r2Flag[j] { - k = j + 1 - break - } - } - if r1[i] != r2[j] { - transCount++ - } - } - } - transCount /= 2 - - // adjust for similarities in nonmatched characters - distance = float64(commonChars)/float64(r1Length) + - float64(commonChars)/float64(r2Length) + - (float64(commonChars-transCount))/float64(commonChars) - distance /= 3.0 - - // give more weight to already-similar strings - if winklerize && distance > 0.7 { - - // the first 4 characters in common - if minLength >= 4 { - j = 4 - } else { - j = minLength - } - - for i = 0; i < j && len(r1) > i && len(r2) > i && r1[i] == r2[i] && nan(r1[i]); i++ { - } - - if i > 0 { - distance += float64(i) * 0.1 * (1.0 - distance) - } - - if longTolerance && (minLength > 4) && (commonChars > i+1) && - (2*commonChars >= minLength+i) { - if nan(r1[0]) { - distance += (1.0 - distance) * (float64(commonChars-i-1) / - (float64(r1Length) + float64(r2Length) - float64(i*2) + 2)) - } - } - } - - return -} - -// Jaro computes the Jaro edit distance between two strings. It represents -// this with a float64 between 0 and 1 inclusive, with 0 indicating the two -// strings are not at all similar and 1 indicating the two strings are exact -// matches. -// -// See http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance for a -// full description. -func Jaro(r1 string, r2 string) (distance float64) { - return jaroWinklerBase(r1, r2, false, false) -} - -// JaroWinkler computes the Jaro-Winkler edit distance between two strings. -// This is a modification of the Jaro algorithm that gives additional weight -// to prefix matches. -func JaroWinkler(r1 string, r2 string, longTolerance bool) (distance float64) { - return jaroWinklerBase(r1, r2, longTolerance, true) -} diff --git a/vendor/github.com/antzucaro/matchr/levenshtein.go b/vendor/github.com/antzucaro/matchr/levenshtein.go deleted file mode 100644 index 5f3ad0f8..00000000 --- a/vendor/github.com/antzucaro/matchr/levenshtein.go +++ /dev/null @@ -1,48 +0,0 @@ -package matchr - -// Levenshtein computes the Levenshtein distance between two -// strings. The returned value - distance - is the number of insertions, -// deletions, and substitutions it takes to transform one -// string (s1) into another (s2). Each step in the transformation "costs" -// one distance point. -func Levenshtein(s1 string, s2 string) (distance int) { - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - rows := len(r1) + 1 - cols := len(r2) + 1 - - var d1 int - var d2 int - var d3 int - var i int - var j int - dist := make([]int, rows*cols) - - for i = 0; i < rows; i++ { - dist[i*cols] = i - } - - for j = 0; j < cols; j++ { - dist[j] = j - } - - for j = 1; j < cols; j++ { - for i = 1; i < rows; i++ { - if r1[i-1] == r2[j-1] { - dist[(i*cols)+j] = dist[((i-1)*cols)+(j-1)] - } else { - d1 = dist[((i-1)*cols)+j] + 1 - d2 = dist[(i*cols)+(j-1)] + 1 - d3 = dist[((i-1)*cols)+(j-1)] + 1 - - dist[(i*cols)+j] = min(d1, min(d2, d3)) - } - } - } - - distance = dist[(cols*rows)-1] - - return -} diff --git a/vendor/github.com/antzucaro/matchr/longestcommonsubsequence.go b/vendor/github.com/antzucaro/matchr/longestcommonsubsequence.go deleted file mode 100644 index 97c1224e..00000000 --- a/vendor/github.com/antzucaro/matchr/longestcommonsubsequence.go +++ /dev/null @@ -1,30 +0,0 @@ -package matchr - -// LongestCommonSubsequence computes the longest substring -// between two strings. The returned value is the length -// of the substring, which contains letters from both -// strings, while maintaining the order of the letters. -func LongestCommonSubsequence(s1, s2 string) int { - r1 := []rune(s1) - r2 := []rune(s2) - table := make([][]int, len(s1)+1) - - // Construct 2D table - for i := range table { - table[i] = make([]int, len(s2)+1) - } - - var i int - var j int - - for i = len(r1) - 1; i >= 0; i-- { - for j = len(r2) - 1; j >= 0; j-- { - if r1[i] == r2[j] { - table[i][j] = 1 + table[i+1][j+1] - } else { - table[i][j] = maxI(table[i+1][j], table[i][j+1]) - } - } - } - return table[0][0] -} diff --git a/vendor/github.com/antzucaro/matchr/metaphone.go b/vendor/github.com/antzucaro/matchr/metaphone.go deleted file mode 100644 index 484d0f4d..00000000 --- a/vendor/github.com/antzucaro/matchr/metaphone.go +++ /dev/null @@ -1,721 +0,0 @@ -package matchr - -import ( - "bytes" - "strings" -) - -type metaphoneresult struct { - // the maximum number of code values to calculate - maxLength int - - // whether to calculate an alternate - calcAlternate bool - - // no direct modifications - only through add() - primary bytes.Buffer - alternate bytes.Buffer - - // length of the private buffers - PrimaryLength int - AlternateLength int -} - -func newMetaphoneresult(maxLength int, calcAlternate bool) (r *metaphoneresult) { - r = &metaphoneresult{maxLength: maxLength, calcAlternate: calcAlternate} - return -} - -func (r *metaphoneresult) add(c1 string, c2 string) { - if c1 != "" { - r.primary.WriteString(c1) - r.PrimaryLength += len(c1) - } - - if c2 != "" && r.calcAlternate { - r.alternate.WriteString(c2) - r.AlternateLength += len(c2) - } -} - -func (r *metaphoneresult) isComplete() bool { - return r.PrimaryLength >= r.maxLength && r.AlternateLength >= r.maxLength -} - -func (r *metaphoneresult) result() (primary string, alternate string) { - primary = r.primary.String() - if len(primary) > r.maxLength { - primary = primary[0:r.maxLength] - } - alternate = r.alternate.String() - if len(alternate) > r.maxLength { - alternate = alternate[0:r.maxLength] - } - return -} - -// utility functions for checking things within a string -func isSlavoGermanic(value string) bool { - return strings.Contains(value, "W") || strings.Contains(value, "K") || - strings.Contains(value, "CZ") || strings.Contains(value, "WITZ") -} - -func isSilentStart(input runestring) bool { - SILENT_START := [...]string{"GN", "KN", "PN", "WR", "PS"} - - prefix := input.SafeSubstr(0, 2) - - for _, criteria := range SILENT_START { - if prefix == criteria { - return true - } - } - - return false -} - -func handleVowel(result *metaphoneresult, index int) int { - if index == 0 { - result.add("A", "A") - } - - return index + 1 -} - -/****************************************************************************** - * Entry handlers for letters. - *****************************************************************************/ -func handleC(input runestring, result *metaphoneresult, index int) int { - if conditionC0(input, index) { - result.add("K", "K") - index += 2 - } else if index == 0 && input.Contains(index, 6, "CAESAR") { - result.add("S", "S") - index += 2 - } else if input.Contains(index, 2, "CH") { - index = handleCH(input, result, index) - } else if input.Contains(index, 2, "CZ") && - !input.Contains(index-2, 4, "WICZ") { - result.add("S", "X") - index += 2 - } else if input.Contains(index+1, 3, "CIA") { - result.add("X", "X") - index += 3 - } else if input.Contains(index, 2, "CC") && - !(index == 1 && input.SafeAt(0) == 'M') { - return handleCC(input, result, index) - } else if input.Contains(index, 2, "CK") || - input.Contains(index, 2, "CG") || - input.Contains(index, 2, "CQ") { - result.add("K", "K") - index += 2 - } else if input.Contains(index, 2, "CI") || - input.Contains(index, 2, "CE") || - input.Contains(index, 2, "CY") { - if input.Contains(index, 3, "CIO") || - input.Contains(index, 3, "CIE") || - input.Contains(index, 3, "CIA") { - result.add("S", "X") - } else { - result.add("S", "S") - } - index += 2 - } else { - result.add("K", "K") - if input.Contains(index+1, 2, " C") || - input.Contains(index+1, 2, " Q") || - input.Contains(index+1, 2, " G") { - index += 3 - } else if (input.Contains(index+1, 1, "C") || - input.Contains(index+1, 1, "K") || - input.Contains(index+1, 1, "Q")) && - !(input.Contains(index+1, 2, "CE") || - input.Contains(index+1, 2, "CI")) { - index += 2 - } else { - index++ - } - } - - return index -} - -func handleCC(input runestring, result *metaphoneresult, index int) int { - if input.Contains(index+2, 1, "I", "E", "H") && - !input.Contains(index+2, 2, "HU") { - if (index == 1 && input.SafeAt(index-1) == 'A') || - (input.Contains(index-1, 5, "UCCEE", "UCCES")) { - result.add("KS", "KS") - } else { - result.add("X", "X") - } - index += 3 - } else { - result.add("K", "K") - index += 2 - } - return index -} - -func handleCH(input runestring, result *metaphoneresult, index int) int { - if index > 0 && input.Contains(index, 4, "CHAE") { - result.add("K", "X") - return index + 2 - } else if conditionCH0(input, index) { - result.add("K", "K") - return index + 2 - // TODO: combine this condition with the one above? - } else if conditionCH1(input, index) { - result.add("K", "K") - return index + 2 - } else { - if index > 0 { - if input.Contains(0, 2, "MC") { - result.add("K", "K") - } else { - result.add("X", "K") - } - } else { - result.add("X", "X") - } - return index + 2 - } -} - -func handleD(input runestring, result *metaphoneresult, index int) int { - if input.Contains(index, 2, "DG") { - if input.Contains(index+2, 1, "I", "E", "Y") { - result.add("J", "J") - index += 3 - } else { - result.add("TK", "TK") - index += 2 - } - } else if input.Contains(index, 2, "DT", "DD") { - result.add("T", "T") - index += 2 - } else { - result.add("T", "T") - index++ - } - return index -} - -func handleG(input runestring, result *metaphoneresult, index int, slavoGermanic bool) int { - if input.SafeAt(index+1) == 'H' { - index = handleGH(input, result, index) - } else if input.SafeAt(index+1) == 'N' { - if index == 1 && isVowel(input.SafeAt(0)) && !slavoGermanic { - result.add("KN", "N") - } else if !input.Contains(index+2, 2, "EY") && input.SafeAt(index+1) != 'Y' && !slavoGermanic { - result.add("N", "KN") - } else { - result.add("KN", "KN") - } - index += 2 - } else if input.Contains(index+1, 2, "LI") && !slavoGermanic { - result.add("KL", "L") - index += 2 - } else if index == 0 && (input.SafeAt(index+1) == 'Y' || - input.Contains(index+1, 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER")) { - result.add("K", "J") - index += 2 - } else if (input.Contains(index+1, 2, "ER") || - input.SafeAt(index+1) == 'Y') && - !input.Contains(0, 6, "DANGER", "RANGER", "MANGER") && - !input.Contains(index-1, 1, "E", "I") && - !input.Contains(index-1, 3, "RGY", "OGY") { - result.add("K", "J") - index += 2 - } else if input.Contains(index+1, 1, "E", "I", "Y") || - input.Contains(index-1, 4, "AGGI", "OGGI") { - if input.Contains(0, 4, "VAN ", "VON ") || - input.Contains(0, 3, "SCH") || - input.Contains(index+1, 2, "ET") { - result.add("K", "K") - } else if input.Contains(index+1, 3, "IER") { - result.add("J", "J") - } else { - result.add("J", "K") - } - index += 2 - } else if input.SafeAt(index+1) == 'G' { - result.add("K", "K") - index += 2 - } else { - result.add("K", "K") - index++ - } - return index -} - -func handleGH(input runestring, result *metaphoneresult, index int) int { - if index > 0 && !isVowel(input.SafeAt(index-1)) { - result.add("K", "K") - index += 2 - } else if index == 0 { - if input.SafeAt(index+2) == 'I' { - result.add("J", "J") - } else { - result.add("K", "K") - } - index += 2 - } else if (index > 1 && input.Contains(index-2, 1, "B", "H", "D")) || - (index > 2 && input.Contains(index-3, 1, "B", "H", "D")) || - (index > 3 && input.Contains(index-4, 1, "B", "H")) { - index += 2 - } else { - if index > 2 && input.SafeAt(index-1) == 'U' && - input.Contains(index-3, 1, "C", "G", "L", "R", "T") { - result.add("F", "F") - } else if index > 0 && input.SafeAt(index-1) != 'I' { - result.add("K", "K") - } - index += 2 - } - return index -} - -func handleH(input runestring, result *metaphoneresult, index int) int { - if (index == 0 || isVowel(input.SafeAt(index-1))) && - isVowel(input.SafeAt(index+1)) { - result.add("H", "H") - index += 2 - } else { - index++ - } - return index -} - -func handleJ(input runestring, result *metaphoneresult, index int, slavoGermanic bool) int { - if input.Contains(index, 4, "JOSE") || input.Contains(0, 4, "SAN ") { - if (index == 0 && (input.SafeAt(index+4) == ' ') || - len(input) == 4) || input.Contains(0, 4, "SAN ") { - result.add("H", "H") - } else { - result.add("J", "H") - } - index++ - } else { - if index == 0 && !input.Contains(index, 4, "JOSE") { - result.add("J", "A") - } else if isVowel(input.SafeAt(index-1)) && !slavoGermanic && - (input.SafeAt(index+1) == 'A' || input.SafeAt(index+1) == 'O') { - result.add("J", "H") - } else if index == (len(input) - 1) { - result.add("J", " ") - } else if !input.Contains(index+1, 1, - "L", "T", "K", "S", "N", "M", "B", "Z") && - !input.Contains(index-1, 1, "S", "K", "L") { - result.add("J", "J") - } - - if input.SafeAt(index+1) == 'J' { - index += 2 - } else { - index++ - } - } - return index -} - -func handleL(input runestring, result *metaphoneresult, index int) int { - if input.SafeAt(index+1) == 'L' { - if conditionL0(input, index) { - result.add("L", "") - } else { - result.add("L", "L") - } - index += 2 - } else { - result.add("L", "L") - index++ - } - return index -} - -func handleP(input runestring, result *metaphoneresult, index int) int { - if input.SafeAt(index+1) == 'H' { - result.add("F", "F") - index += 2 - } else { - result.add("P", "P") - if input.Contains(index+1, 1, "P", "B") { - index += 2 - } else { - index++ - } - } - return index -} - -func handleR(input runestring, result *metaphoneresult, index int, slavoGermanic bool) int { - if index == (len(input)-1) && !slavoGermanic && - input.Contains(index-2, 2, "IE") && - !input.Contains(index-4, 2, "ME", "MA") { - result.add("", "R") - } else { - result.add("R", "R") - } - - if input.SafeAt(index+1) == 'R' { - index += 2 - } else { - index++ - } - return index -} - -func handleS(input runestring, result *metaphoneresult, index int, slavoGermanic bool) int { - if input.Contains(index-1, 3, "ISL", "YSL") { - index++ - } else if index == 0 && input.Contains(index, 5, "SUGAR") { - result.add("X", "S") - index++ - } else if input.Contains(index, 2, "SH") { - if input.Contains(index+1, 4, "HEIM", "HOEK", "HOLM", "HOLZ") { - result.add("S", "S") - } else { - result.add("X", "X") - } - index += 2 - } else if input.Contains(index, 3, "SIO", "SIA") || - input.Contains(index, 4, "SIAN") { - if slavoGermanic { - result.add("S", "S") - } else { - result.add("S", "X") - } - index += 3 - } else if (index == 0 && input.Contains(index+1, 1, "M", "N", "L", "W")) || - input.Contains(index+1, 1, "Z") { - result.add("S", "X") - if input.Contains(index+1, 1, "Z") { - index += 2 - } else { - index++ - } - } else if input.Contains(index, 2, "SC") { - index = handleSC(input, result, index) - } else { - if index == len(input)-1 && - input.Contains(index-2, 2, "AI", "OI") { - result.add("", "S") - } else { - result.add("S", "S") - } - - if input.Contains(index+1, 1, "S", "Z") { - index += 2 - } else { - index++ - } - } - return index -} - -func handleSC(input runestring, result *metaphoneresult, index int) int { - if input.SafeAt(index+2) == 'H' { - if input.Contains(index+3, 2, "OO", "ER", "EN", "UY", "ED", "EM") { - if input.Contains(index+3, 2, "ER", "EN") { - result.add("X", "SK") - } else { - result.add("SK", "SK") - } - } else { - if index == 0 && !isVowel(input.SafeAt(3)) && input.SafeAt(3) != 'W' { - result.add("X", "S") - } else { - result.add("X", "X") - } - } - } else if input.Contains(index+2, 1, "I", "E", "Y") { - result.add("S", "S") - } else { - result.add("SK", "SK") - } - index += 3 - - return index -} - -func handleT(input runestring, result *metaphoneresult, index int) int { - if input.Contains(index, 4, "TION") { - result.add("X", "X") - index += 3 - } else if input.Contains(index, 3, "TIA", "TCH") { - result.add("X", "X") - index += 3 - } else if input.Contains(index, 2, "TH") || input.Contains(index, 3, "TTH") { - if input.Contains(index+2, 2, "OM", "AM") || - input.Contains(0, 4, "VAN ", "VON ") || - input.Contains(0, 3, "SCH") { - result.add("T", "T") - } else { - result.add("0", "T") - } - index += 2 - } else { - result.add("T", "T") - if input.Contains(index+1, 1, "T", "D") { - index += 2 - } else { - index++ - } - } - return index -} - -func handleW(input runestring, result *metaphoneresult, index int) int { - if input.Contains(index, 2, "WR") { - result.add("R", "R") - index += 2 - } else { - if index == 0 && (isVowel(input.SafeAt(index+1)) || - input.Contains(index, 2, "WH")) { - if isVowel(input.SafeAt(index + 1)) { - result.add("A", "F") - } else { - result.add("A", "A") - } - index++ - } else if (index == len(input)-1 && isVowel(input.SafeAt(index-1))) || - input.Contains(index-1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") || - input.Contains(0, 3, "SCH") { - result.add("", "F") - index++ - } else if input.Contains(index, 4, "WICZ", "WITZ") { - result.add("TS", "FX") - index += 4 - } else { - index++ - } - } - return index -} - -func handleX(input runestring, result *metaphoneresult, index int) int { - if index == 0 { - result.add("S", "S") - index++ - } else { - if !((index == len(input)-1) && - (input.Contains(index-3, 3, "IAU", "EAU") || - input.Contains(index-2, 2, "AU", "OU"))) { - result.add("KS", "KS") - } - - if input.Contains(index+1, 1, "C", "X") { - index += 2 - } else { - index++ - } - } - return index -} - -func handleZ(input runestring, result *metaphoneresult, index int, slavoGermanic bool) int { - if input.SafeAt(index+1) == 'H' { - result.add("J", "J") - } else { - if input.Contains(index+1, 2, "ZO", "ZI", "ZA") || - (slavoGermanic && (index > 0 && input.SafeAt(index-1) != 'T')) { - result.add("S", "TS") - } else { - result.add("S", "S") - } - } - - if input.SafeAt(index+1) == 'Z' { - index += 2 - } else { - index++ - } - return index -} - -/****************************************************************************** - * Complex conditional handlers for letters - *****************************************************************************/ -func conditionC0(input runestring, index int) bool { - if input.Contains(index, 4, "CHIA") { - return true - } else if index <= 1 { - return false - } else if isVowel(input.SafeAt(index - 2)) { - return false - } else if !input.Contains(index-1, 3, "ACH") { - return false - } else { - c := input.SafeAt(index + 2) - return (c != 'I' && c != 'E') || - (input.Contains(index-2, 6, "BACHER") || - input.Contains(index-2, 6, "MACHER")) - } -} - -func conditionCH0(input runestring, index int) bool { - if index != 0 { - return false - } else if !input.Contains(index+1, 5, "HARAC", "HARIS") && - !input.Contains(index+1, 3, "HOR", "HYM", "HIA", "HEM") { - return false - } else if input.Contains(0, 5, "CHORE") { - return false - } else { - return true - } -} - -func conditionCH1(input runestring, index int) bool { - // good god this is ugly - return (input.Contains(0, 4, "VAN ", "VON ") || input.Contains(0, 3, "SCH")) || - input.Contains(index-2, 6, "ORCHES", "ARCHIT", "ORCHID") || - input.Contains(index+2, 1, "T", "S") || - ((input.Contains(index-1, 1, "A", "O", "U", "E") || index == 0) && - (input.Contains(index+2, 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ") || - index+1 == len(input)-1)) -} - -func conditionL0(input runestring, index int) bool { - if index == (len(input)-3) && - input.Contains(index-1, 4, "ILLO", "ILLA", "ALLE") { - return true - } else if (input.Contains(len(input)-2, 2, "AS", "OS") || - input.Contains(len(input)-1, 1, "A", "O")) && - (input.Contains(index-1, 4, "ALLE")) { - return true - } else { - return false - } -} - -func conditionM0(input runestring, index int) bool { - if input.SafeAt(index+1) == 'M' { - return true - } - - return input.Contains(index-1, 3, "UMB") && - ((index+1) == (len(input)-1) || - input.Contains(index+2, 2, "ER")) -} - -// DoubleMetaphone computes the Double-Metaphone value of the input string. -// This value is a phonetic representation of how the string sounds, with -// affordances for many different language dialects. It was originally -// developed by Lawrence Phillips in the 1990s. -// -// More information about this algorithm can be found on Wikipedia at -// http://en.wikipedia.org/wiki/Metaphone. -func DoubleMetaphone(s1 string) (string, string) { - // trim, upper space - s1 = cleanInput(s1) - - // structure to traverse the string by code point, not byte - input := runestring(s1) - - slavoGermanic := isSlavoGermanic(s1) - - // where we are in the string - index := 0 - - if isSilentStart(input) { - index += 1 - } - - result := newMetaphoneresult(4, true) - - for !result.isComplete() && index <= len(input)-1 { - c := rune(input.SafeAt(index)) - switch c { - case 'A', 'E', 'I', 'O', 'U', 'Y': - index = handleVowel(result, index) - case 'B': - result.add("P", "P") - if input.SafeAt(index+1) == 'B' { - index += 2 - } else { - index++ - } - case 'Ç': - result.add("S", "S") - index++ - case 'C': - index = handleC(input, result, index) - case 'D': - index = handleD(input, result, index) - case 'F': - result.add("F", "F") - if input.SafeAt(index+1) == 'F' { - index += 2 - } else { - index++ - } - case 'G': - index = handleG(input, result, index, slavoGermanic) - case 'H': - index = handleH(input, result, index) - case 'J': - index = handleJ(input, result, index, slavoGermanic) - case 'K': - result.add("K", "K") - if input.SafeAt(index+1) == 'K' { - index += 2 - } else { - index++ - } - case 'L': - index = handleL(input, result, index) - case 'M': - result.add("M", "M") - if conditionM0(input, index) { - index += 2 - } else { - index++ - } - case 'N': - result.add("N", "N") - if input.SafeAt(index+1) == 'N' { - index += 2 - } else { - index++ - } - case 'Ñ': - result.add("N", "N") - index++ - case 'P': - index = handleP(input, result, index) - case 'Q': - result.add("K", "K") - if input.SafeAt(index+1) == 'Q' { - index += 2 - } else { - index++ - } - case 'R': - index = handleR(input, result, index, slavoGermanic) - case 'S': - index = handleS(input, result, index, slavoGermanic) - case 'T': - index = handleT(input, result, index) - case 'V': - result.add("F", "F") - if input.SafeAt(index+1) == 'V' { - index += 2 - } else { - index++ - } - case 'W': - index = handleW(input, result, index) - case 'X': - index = handleX(input, result, index) - case 'Z': - index = handleZ(input, result, index, slavoGermanic) - default: - index++ - } - - } - - return result.result() -} diff --git a/vendor/github.com/antzucaro/matchr/nysiis.go b/vendor/github.com/antzucaro/matchr/nysiis.go deleted file mode 100644 index bbf699d5..00000000 --- a/vendor/github.com/antzucaro/matchr/nysiis.go +++ /dev/null @@ -1,156 +0,0 @@ -package matchr - -// NYSIIS computes the NYSIIS phonetic encoding of the input string. It is a -// modification of the traditional Soundex algorithm. -func NYSIIS(s1 string) string { - cleans1 := runestring(cleanInput(s1)) - input := runestring(make([]rune, 0, len(s1))) - - // The output can't be larger than the string itself - output := runestring(make([]rune, 0, len(s1))) - - // 0. Remove all non-ASCII characters - for _, v := range cleans1 { - if v >= 65 && v <= 90 { - input = append(input, v) - } - } - - if len(input) == 0 { - return "" - } - - // 1. Transcoding first characters - switch input[0] { - case 'M': - if input.SafeSubstr(0, 3) == "MAC" { - // MAC -> MCC - input[1] = 'C' - } - case 'K': - if input.SafeSubstr(0, 2) == "KN" { - // KN -> NN - input[0] = 'N' - } else { - // K -> C - input[0] = 'C' - } - case 'P': - next := input.SafeAt(1) - if next == 'H' { - // PH -> FF - input[0] = 'F' - input[1] = 'F' - } else if next == 'F' { - // PF -> FF - input[0] = 'F' - } - case 'S': - if input.SafeSubstr(0, 3) == "SCH" { - input[1] = 'S' - input[2] = 'S' - } - } - - // 2. Transcoding last characters - switch input.SafeSubstr(len(input)-2, 2) { - case "EE", "IE": - // EE, IE -> Y - input.Del(len(input) - 2) - input[len(input)-1] = 'Y' - case "DT", "RT", "RD", "NT", "ND": - // DT, RT, RD, NT, ND -> D - input.Del(len(input) - 2) - input[len(input)-1] = 'D' - } - - // 3. First character of key = first character of name - output = append(output, input[0]) - last := input[0] - - for i := 1; i < len(input); i++ { - c := input[i] - switch c { - case 'A', 'I', 'O', 'U': - // A, E, I, O, U -> A (E is separate) - input[i] = 'A' - case 'E': - // EV -> AF, else A - if input.SafeAt(i+1) == 'V' { - input[i+1] = 'F' - } - input[i] = 'A' - case 'Q': - // Q -> G - input[i] = 'G' - case 'Z': - // Z -> S - input[i] = 'S' - case 'M': - // M -> N - input[i] = 'N' - case 'K': - // KN -> N, else K -> C - if input.SafeAt(i+1) == 'N' { - input.Del(i) - } else { - input[i] = 'C' - } - case 'S': - // SCH -> SSS - if input.SafeSubstr(i, 3) == "SCH" { - input[i+1] = 'S' - input[i+2] = 'S' - } - case 'P': - // PH -> FF - if input.SafeAt(i+1) == 'H' { - input[i] = 'F' - input[i+1] = 'F' - } - case 'H': - // H -> $(previous character) if previous character or - // next character is a non-vowel - prev := input.SafeAt(i - 1) - next := input.SafeAt(i + 1) - if !isVowelNoY(prev) || !isVowelNoY(next) { - input[i] = prev - } - case 'W': - prev := input.SafeAt(i - 1) - if isVowelNoY(prev) { - input[i] = prev - } - } - - if input[i] != last && input[i] != 0 { - output = append(output, input[i]) - } - last = input[i] - } - - // have to be careful here because we've already added the first - // key value - if len(output) > 1 { - // remove trailing s - if output.SafeAt(len(output)-1) == 'S' { - output.Del(len(output) - 1) - } - - // trailing AY -> Y - if len(output) > 2 && output.SafeSubstr(len(output)-2, 2) == "AY" { - output.Del(len(output) - 2) - } - - // trailing A -> remove it - if output.SafeAt(len(output)-1) == 'A' { - output.Del(len(output) - 1) - } - } - - if len(output) > 6 { - return string(output[0:6]) - } else { - return string(output) - } -} diff --git a/vendor/github.com/antzucaro/matchr/osa.go b/vendor/github.com/antzucaro/matchr/osa.go deleted file mode 100644 index 8e5de2ee..00000000 --- a/vendor/github.com/antzucaro/matchr/osa.go +++ /dev/null @@ -1,56 +0,0 @@ -package matchr - -// OSA computes the Optimal String Alignment distance between two -// strings. The returned value - distance - is the number of insertions, -// deletions, substitutions, and transpositions it takes to transform one -// string (s1) into another (s2). Each step in the transformation "costs" -// one distance point. It is similar to Damerau-Levenshtein, but is simpler -// because it does not allow multiple edits on any substring. -func OSA(s1 string, s2 string) (distance int) { - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - rows := len(r1) + 1 - cols := len(r2) + 1 - - var i, j, d1, d2, d3, d_now, cost int - - dist := make([]int, rows*cols) - - for i = 0; i < rows; i++ { - dist[i*cols] = i - } - - for j = 0; j < cols; j++ { - dist[j] = j - } - - for i = 1; i < rows; i++ { - for j = 1; j < cols; j++ { - if r1[i-1] == r2[j-1] { - cost = 0 - } else { - cost = 1 - } - - d1 = dist[((i-1)*cols)+j] + 1 - d2 = dist[(i*cols)+(j-1)] + 1 - d3 = dist[((i-1)*cols)+(j-1)] + cost - - d_now = min(d1, min(d2, d3)) - - if i > 2 && j > 2 && r1[i-1] == r2[j-2] && - r1[i-2] == r2[j-1] { - d1 = dist[((i-2)*cols)+(j-2)] + cost - d_now = min(d_now, d1) - } - - dist[(i*cols)+j] = d_now - } - } - - distance = dist[(cols*rows)-1] - - return -} diff --git a/vendor/github.com/antzucaro/matchr/phonex.go b/vendor/github.com/antzucaro/matchr/phonex.go deleted file mode 100644 index 78f4982a..00000000 --- a/vendor/github.com/antzucaro/matchr/phonex.go +++ /dev/null @@ -1,128 +0,0 @@ -package matchr - -func preProcess(input []rune) []rune { - output := runestring(make([]rune, 0, len(input))) - - // 0. Remove all non-ASCII characters - for _, v := range input { - if v >= 65 && v <= 90 { - output = append(output, v) - } - } - - // 1. Remove all trailing 'S' characters at the end of the name - for i := len(output) - 1; i >= 0 && output[i] == 'S'; i-- { - output.Del(i) - } - - // 2. Convert leading letter pairs as follows - // KN -> N, PH -> F, WR -> R - switch output.SafeSubstr(0, 2) { - case "KN": - output = output[1:] - case "PH": - output[0] = 'F' // H will be ignored anyway - case "WR": - output = output[1:] - } - - // 3a. Convert leading single letters as follows: - // H -> Remove - if output.SafeAt(0) == 'H' { - output = output[1:] - } - - // 3a. Convert leading single letters as follows: - // E,I,O,U,Y -> A - // P -> B - // V -> F - // K,Q -> C - // J -> G - // Z -> S - switch output.SafeAt(0) { - case 'E', 'I', 'O', 'U', 'Y': - output[0] = 'A' - case 'P': - output[0] = 'B' - case 'V': - output[0] = 'F' - case 'K', 'Q': - output[0] = 'C' - case 'J': - output[0] = 'G' - case 'Z': - output[0] = 'S' - } - - return output -} - -// Phonex computes the Phonex phonetic encoding of the input string. Phonex is -// a modification of the venerable Soundex algorithm. It accounts for a few -// more letter combinations to improve accuracy on some data sets. -// -// This implementation is based off of the original C implementation by the -// creator - A. J. Lait - as found in his research paper entitled "An -// Assessment of Name Matching Algorithms." -func Phonex(s1 string) string { - - // preprocess - s1 = cleanInput(s1) - - input := runestring(preProcess([]rune(s1))) - - result := make([]rune, 0, len(input)) - - last := rune(0) - code := rune(0) - for i := 0; i < len(input) && - input[i] != ' ' && - input[i] != ',' && - len(result) < 4; i++ { - switch input[i] { - case 'B', 'P', 'F', 'V': - code = '1' - case 'C', 'S', 'K', 'G', 'J', 'Q', 'X', 'Z': - code = '2' - case 'D', 'T': - if input.SafeAt(i+1) != 'C' { - code = '3' - } - case 'L': - if isVowel(input.SafeAt(i+1)) || i == len(input)-1 { - code = '4' - } - case 'M', 'N': - nextChar := input.SafeAt(i + 1) - if nextChar == 'D' || nextChar == 'G' { - // ignore next character - i++ - } - code = '5' - case 'R': - if isVowel(input.SafeAt(i+1)) || i == len(input)-1 { - code = '6' - } - default: - code = 0 - } - - if last != code && code != 0 && i != 0 { - result = append(result, code) - } - - // special case for 1st character: we use the actual character - if i == 0 { - result = append(result, input[i]) - last = code - } else { - last = result[len(result)-1] - } - } - - for len(result) < 4 { - result = append(result, '0') - } - - return string(result) -} diff --git a/vendor/github.com/antzucaro/matchr/runestring.go b/vendor/github.com/antzucaro/matchr/runestring.go deleted file mode 100644 index 41af3da3..00000000 --- a/vendor/github.com/antzucaro/matchr/runestring.go +++ /dev/null @@ -1,44 +0,0 @@ -package matchr - -type runestring []rune - -// A safe way to index a runestring. It will return a null rune if you try -// to index outside of the bounds of the runestring. -func (r *runestring) SafeAt(pos int) rune { - if pos < 0 || pos >= len(*r) { - return 0 - } else { - return (*r)[pos] - } -} - -// A safe way to obtain a substring of a runestring. It will return a null -// string ("") if you index somewhere outside its bounds. -func (r *runestring) SafeSubstr(pos int, length int) string { - if pos < 0 || pos > len(*r) || (pos+length) > len(*r) { - return "" - } else { - return string((*r)[pos : pos+length]) - } -} - -// Delete characters at positions pos. It will do nothing if you provide -// an index outside the bounds of the runestring. -func (r *runestring) Del(pos ...int) { - for _, i := range pos { - if i >= 0 && i <= len(*r) { - *r = append((*r)[:i], (*r)[i+1:]...) - } - } -} - -// A helper to determine if any substrings exist within the given runestring. -func (r *runestring) Contains(start int, length int, criteria ...string) bool { - substring := r.SafeSubstr(start, length) - for _, c := range criteria { - if substring == c { - return true - } - } - return false -} diff --git a/vendor/github.com/antzucaro/matchr/smithwaterman.go b/vendor/github.com/antzucaro/matchr/smithwaterman.go deleted file mode 100644 index 161d86b5..00000000 --- a/vendor/github.com/antzucaro/matchr/smithwaterman.go +++ /dev/null @@ -1,87 +0,0 @@ -package matchr - -const GAP_COST = float64(0.5) - -func getCost(r1 []rune, r1Index int, r2 []rune, r2Index int) float64 { - if r1[r1Index] == r2[r2Index] { - return 1.0 - } else { - return -2.0 - } -} - -// SmithWaterman computes the Smith-Waterman local sequence alignment for the -// two input strings. This was originally designed to find similar regions in -// strings representing DNA or protein sequences. -func SmithWaterman(s1 string, s2 string) float64 { - var cost float64 - - // index by code point, not byte - r1 := []rune(s1) - r2 := []rune(s2) - - r1Len := len(r1) - r2Len := len(r2) - - if r1Len == 0 { - return float64(r2Len) - } - - if r2Len == 0 { - return float64(r1Len) - } - - d := make([][]float64, r1Len) - for i := range d { - d[i] = make([]float64, r2Len) - } - - var maxSoFar float64 - for i := 0; i < r1Len; i++ { - // substitution cost - cost = getCost(r1, i, r2, 0) - if i == 0 { - d[0][0] = max(0.0, max(-GAP_COST, cost)) - } else { - d[i][0] = max(0.0, max(d[i-1][0]-GAP_COST, cost)) - } - - // save if it is the biggest thus far - if d[i][0] > maxSoFar { - maxSoFar = d[i][0] - } - } - - for j := 0; j < r2Len; j++ { - // substitution cost - cost = getCost(r1, 0, r2, j) - if j == 0 { - d[0][0] = max(0, max(-GAP_COST, cost)) - } else { - d[0][j] = max(0, max(d[0][j-1]-GAP_COST, cost)) - } - - // save if it is the biggest thus far - if d[0][j] > maxSoFar { - maxSoFar = d[0][j] - } - } - - for i := 1; i < r1Len; i++ { - for j := 1; j < r2Len; j++ { - cost = getCost(r1, i, r2, j) - - // find the lowest cost - d[i][j] = max( - max(0, d[i-1][j]-GAP_COST), - max(d[i][j-1]-GAP_COST, d[i-1][j-1]+cost)) - - // save if it is the biggest thus far - if d[i][j] > maxSoFar { - maxSoFar = d[i][j] - } - } - } - - return maxSoFar -} diff --git a/vendor/github.com/antzucaro/matchr/soundex.go b/vendor/github.com/antzucaro/matchr/soundex.go deleted file mode 100644 index 919eda8a..00000000 --- a/vendor/github.com/antzucaro/matchr/soundex.go +++ /dev/null @@ -1,72 +0,0 @@ -package matchr - -import "strings" - -// Soundex computes the Soundex phonetic representation of the input string. It -// attempts to encode homophones with the same characters. More information can -// be found at http://en.wikipedia.org/wiki/Soundex. -func Soundex(s1 string) string { - if len(s1) == 0 { - return "" - } - - // we should work with all uppercase - s1 = strings.ToUpper(s1) - - input := NewString(s1) - - // the encoded value - enc := input.Slice(0, 1) - - c := "" - prev := "" - hw := false - - for i := 0; i < input.RuneCount(); i++ { - switch rune(input.At(i)) { - case 'B', 'F', 'P', 'V': - c = "1" - case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z': - c = "2" - case 'D', 'T': - c = "3" - case 'L': - c = "4" - case 'M', 'N': - c = "5" - case 'R': - c = "6" - case 'H', 'W': - hw = true - default: - c = "" - } - - // don't encode the first position, but we need its code value - // to prevent repeats - if c != "" && c != prev && i > 0 { - // if the next encoded digit is different, we can add it right away - // if it is the same, though, it must not have been preceded - // by an 'H' or a 'W' - if enc[len(enc)-1:len(enc)] != c || !hw { - enc = enc + c - } - - // we're done when we reach four encoded characters - if len(enc) == 4 { - break - } - } - - prev = c - hw = false - } - - // if we've fallen short of 4 "real" encoded characters, - // it gets padded with zeros - for len(enc) < 4 { - enc = enc + "0" - } - - return enc -} diff --git a/vendor/github.com/antzucaro/matchr/utf8.go b/vendor/github.com/antzucaro/matchr/utf8.go deleted file mode 100644 index d8c40950..00000000 --- a/vendor/github.com/antzucaro/matchr/utf8.go +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package matchr - -import ( - "errors" - "unicode/utf8" -) - -// String wraps a regular string with a small structure that provides more -// efficient indexing by code point index, as opposed to byte index. -// Scanning incrementally forwards or backwards is O(1) per index operation -// (although not as fast a range clause going forwards). Random access is -// O(N) in the length of the string, but the overhead is less than always -// scanning from the beginning. -// If the string is ASCII, random access is O(1). -// Unlike the built-in string type, String has internal mutable state and -// is not thread-safe. -type String struct { - str string - numRunes int - // If width > 0, the rune at runePos starts at bytePos and has the specified width. - width int - bytePos int - runePos int - nonASCII int // byte index of the first non-ASCII rune. -} - -// NewString returns a new UTF-8 string with the provided contents. -func NewString(contents string) *String { - return new(String).Init(contents) -} - -// Init initializes an existing String to hold the provided contents. -// It returns a pointer to the initialized String. -func (s *String) Init(contents string) *String { - s.str = contents - s.bytePos = 0 - s.runePos = 0 - for i := 0; i < len(contents); i++ { - if contents[i] >= utf8.RuneSelf { - // Not ASCII. - s.numRunes = utf8.RuneCountInString(contents) - _, s.width = utf8.DecodeRuneInString(contents) - s.nonASCII = i - return s - } - } - // ASCII is simple. Also, the empty string is ASCII. - s.numRunes = len(contents) - s.width = 0 - s.nonASCII = len(contents) - return s -} - -// String returns the contents of the String. This method also means the -// String is directly printable by fmt.Print. -func (s *String) String() string { - return s.str -} - -// RuneCount returns the number of runes (Unicode code points) in the String. -func (s *String) RuneCount() int { - return s.numRunes -} - -// IsASCII returns a boolean indicating whether the String contains only ASCII bytes. -func (s *String) IsASCII() bool { - return s.width == 0 -} - -// Slice returns the string sliced at rune positions [i:j]. -func (s *String) Slice(i, j int) string { - // ASCII is easy. Let the compiler catch the indexing error if there is one. - if j < s.nonASCII { - return s.str[i:j] - } - if i < 0 || j > s.numRunes || i > j { - panic(errors.New("utf8.String: slice index out of range")) - } - if i == j { - return "" - } - // For non-ASCII, after At(i), bytePos is always the position of the indexed character. - var low, high int - switch { - case i < s.nonASCII: - low = i - case i == s.numRunes: - low = len(s.str) - default: - s.At(i) - low = s.bytePos - } - switch { - case j == s.numRunes: - high = len(s.str) - default: - s.At(j) - high = s.bytePos - } - return s.str[low:high] -} - -// At returns the rune with index i in the String. The sequence of runes is the same -// as iterating over the contents with a "for range" clause. -func (s *String) At(i int) int { - // ASCII is easy. Let the compiler catch the indexing error if there is one. - if i < s.nonASCII { - return int(s.str[i]) - } - - // Now we do need to know the index is valid. - if i < 0 || i >= s.numRunes { - panic(errors.New("utf8.String: index out of range")) - } - - var r rune - - // Five easy common cases: within 1 spot of bytePos/runePos, or the beginning, or the end. - // With these cases, all scans from beginning or end work in O(1) time per rune. - switch { - - case i == s.runePos-1: // backing up one rune - r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) - s.runePos = i - s.bytePos -= s.width - return int(r) - case i == s.runePos+1: // moving ahead one rune - s.runePos = i - s.bytePos += s.width - fallthrough - case i == s.runePos: - r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) - return int(r) - case i == 0: // start of string - r, s.width = utf8.DecodeRuneInString(s.str) - s.runePos = 0 - s.bytePos = 0 - return int(r) - - case i == s.numRunes-1: // last rune in string - r, s.width = utf8.DecodeLastRuneInString(s.str) - s.runePos = i - s.bytePos = len(s.str) - s.width - return int(r) - } - - // We need to do a linear scan. There are three places to start from: - // 1) The beginning - // 2) bytePos/runePos. - // 3) The end - // Choose the closest in rune count, scanning backwards if necessary. - forward := true - if i < s.runePos { - // Between beginning and pos. Which is closer? - // Since both i and runePos are guaranteed >= nonASCII, that's the - // lowest location we need to start from. - if i < (s.runePos-s.nonASCII)/2 { - // Scan forward from beginning - s.bytePos, s.runePos = s.nonASCII, s.nonASCII - } else { - // Scan backwards from where we are - forward = false - } - } else { - // Between pos and end. Which is closer? - if i-s.runePos < (s.numRunes-s.runePos)/2 { - // Scan forward from pos - } else { - // Scan backwards from end - s.bytePos, s.runePos = len(s.str), s.numRunes - forward = false - } - } - if forward { - // TODO: Is it much faster to use a range loop for this scan? - for { - r, s.width = utf8.DecodeRuneInString(s.str[s.bytePos:]) - if s.runePos == i { - break - } - s.runePos++ - s.bytePos += s.width - } - } else { - for { - r, s.width = utf8.DecodeLastRuneInString(s.str[0:s.bytePos]) - s.runePos-- - s.bytePos -= s.width - if s.runePos == i { - break - } - } - } - return int(r) -} - -// We want the panic in At(i) to satisfy os.Error, because that's what -// runtime panics satisfy, but we can't import os. This is our solution. - -// error is the type of the error returned if a user calls String.At(i) with i out of range. -// It satisfies os.Error and runtime.Error. -// type error string - -/* -func (err error) String() string { - return string(err) -} - -func (err error) RunTimeError() { -} -*/ diff --git a/vendor/github.com/antzucaro/matchr/util.go b/vendor/github.com/antzucaro/matchr/util.go deleted file mode 100644 index 520e587b..00000000 --- a/vendor/github.com/antzucaro/matchr/util.go +++ /dev/null @@ -1,119 +0,0 @@ -package matchr - -import ( - "math" - "strings" -) - -// min of two integers -func min(a int, b int) (res int) { - if a < b { - res = a - } else { - res = b - } - - return -} - -// max of two integers -func maxI(a int, b int) (res int) { - if a < b { - res = b - } else { - res = a - } - - return -} - -// max of two float64s -func max(a float64, b float64) (res float64) { - if a < b { - res = b - } else { - res = a - } - - return -} - -// is this string index outside of the ASCII numeric code points? -func nan(c rune) bool { - return ((c > 57) || (c < 48)) -} - -// Round a float64 to the given precision -// -// http://play.golang.org/p/S654PxAe_N -// -// (via Rory McGuire at -// https://groups.google.com/forum/#!topic/golang-nuts/ITZV08gAugI) -func round(x float64, prec int) float64 { - if math.IsNaN(x) || math.IsInf(x, 0) { - return x - } - - sign := 1.0 - if x < 0 { - sign = -1 - x *= -1 - } - - var rounder float64 - pow := math.Pow(10, float64(prec)) - intermed := x * pow - _, frac := math.Modf(intermed) - - if frac >= 0.5 { - rounder = math.Ceil(intermed) - } else { - rounder = math.Floor(intermed) - } - - return rounder / pow * sign -} - -// A helper to determine if any substrings exist within the given string -func contains(value *String, start int, length int, criteria ...string) bool { - substring := substring(value, start, length) - for _, c := range criteria { - if substring == c { - return true - } - } - return false -} - -// A fault-tolerant version of Slice. It will return nothing ("") if the index -// is out of bounds. This allows substring-ing without having to bound check -// every time. -func substring(value *String, start int, length int) string { - if start >= 0 && start+length <= value.RuneCount() { - return value.Slice(start, start+length) - } else { - return "" - } -} - -func isVowel(c rune) bool { - switch c { - case 'A', 'E', 'I', 'O', 'U', 'Y': - return true - default: - return false - } -} - -func isVowelNoY(c rune) bool { - switch c { - case 'A', 'E', 'I', 'O', 'U': - return true - default: - return false - } -} - -func cleanInput(input string) string { - return strings.ToUpper(strings.TrimSpace(input)) -} diff --git a/vendor/github.com/urfave/cli/v2/suggestions.go b/vendor/github.com/urfave/cli/v2/suggestions.go index 65bb3cf2..87fa905d 100644 --- a/vendor/github.com/urfave/cli/v2/suggestions.go +++ b/vendor/github.com/urfave/cli/v2/suggestions.go @@ -3,9 +3,18 @@ package cli import ( "fmt" - "github.com/antzucaro/matchr" + "github.com/xrash/smetrics" ) +func jaroWinkler(a, b string) float64 { + // magic values are from https://github.com/xrash/smetrics/blob/039620a656736e6ad994090895784a7af15e0b80/jaro-winkler.go#L8 + const ( + boostThreshold = 0.7 + prefixSize = 4 + ) + return smetrics.JaroWinkler(a, b, boostThreshold, prefixSize) +} + func suggestFlag(flags []Flag, provided string, hideHelp bool) string { distance := 0.0 suggestion := "" @@ -16,7 +25,7 @@ func suggestFlag(flags []Flag, provided string, hideHelp bool) string { flagNames = append(flagNames, HelpFlag.Names()...) } for _, name := range flagNames { - newDistance := matchr.JaroWinkler(name, provided, true) + newDistance := jaroWinkler(name, provided) if newDistance > distance { distance = newDistance suggestion = name @@ -39,7 +48,7 @@ func suggestCommand(commands []*Command, provided string) (suggestion string) { distance := 0.0 for _, command := range commands { for _, name := range append(command.Names(), helpName, helpAlias) { - newDistance := matchr.JaroWinkler(name, provided, true) + newDistance := jaroWinkler(name, provided) if newDistance > distance { distance = newDistance suggestion = name diff --git a/vendor/github.com/xrash/smetrics/.travis.yml b/vendor/github.com/xrash/smetrics/.travis.yml new file mode 100644 index 00000000..d1cd67ff --- /dev/null +++ b/vendor/github.com/xrash/smetrics/.travis.yml @@ -0,0 +1,9 @@ +language: go +go: + - 1.11 + - 1.12 + - 1.13 + - 1.14.x + - master +script: + - cd tests && make diff --git a/vendor/github.com/xrash/smetrics/LICENSE b/vendor/github.com/xrash/smetrics/LICENSE new file mode 100644 index 00000000..80445682 --- /dev/null +++ b/vendor/github.com/xrash/smetrics/LICENSE @@ -0,0 +1,21 @@ +Copyright (C) 2016 Felipe da Cunha Gonçalves +All Rights Reserved. + +MIT LICENSE + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/xrash/smetrics/README.md b/vendor/github.com/xrash/smetrics/README.md new file mode 100644 index 00000000..5e0c1a46 --- /dev/null +++ b/vendor/github.com/xrash/smetrics/README.md @@ -0,0 +1,49 @@ +[![Build Status](https://travis-ci.org/xrash/smetrics.svg?branch=master)](http://travis-ci.org/xrash/smetrics) + +# smetrics + +`smetrics` is "string metrics". + +Package smetrics provides a bunch of algorithms for calculating the distance between strings. + +There are implementations for calculating the popular Levenshtein distance (aka Edit Distance or Wagner-Fischer), as well as the Jaro distance, the Jaro-Winkler distance, and more. + +# How to import + +```go +import "github.com/xrash/smetrics" +``` + +# Documentation + +Go to [https://pkg.go.dev/github.com/xrash/smetrics](https://pkg.go.dev/github.com/xrash/smetrics) for complete documentation. + +# Example + +```go +package main + +import ( + "github.com/xrash/smetrics" +) + +func main() { + smetrics.WagnerFischer("POTATO", "POTATTO", 1, 1, 2) + smetrics.WagnerFischer("MOUSE", "HOUSE", 2, 2, 4) + + smetrics.Ukkonen("POTATO", "POTATTO", 1, 1, 2) + smetrics.Ukkonen("MOUSE", "HOUSE", 2, 2, 4) + + smetrics.Jaro("AL", "AL") + smetrics.Jaro("MARTHA", "MARHTA") + + smetrics.JaroWinkler("AL", "AL", 0.7, 4) + smetrics.JaroWinkler("MARTHA", "MARHTA", 0.7, 4) + + smetrics.Soundex("Euler") + smetrics.Soundex("Ellery") + + smetrics.Hamming("aaa", "aaa") + smetrics.Hamming("aaa", "aab") +} +``` diff --git a/vendor/github.com/xrash/smetrics/doc.go b/vendor/github.com/xrash/smetrics/doc.go new file mode 100644 index 00000000..21bc986c --- /dev/null +++ b/vendor/github.com/xrash/smetrics/doc.go @@ -0,0 +1,19 @@ +/* +Package smetrics provides a bunch of algorithms for calculating +the distance between strings. + +There are implementations for calculating the popular Levenshtein +distance (aka Edit Distance or Wagner-Fischer), as well as the Jaro +distance, the Jaro-Winkler distance, and more. + +For the Levenshtein distance, you can use the functions WagnerFischer() +and Ukkonen(). Read the documentation on these functions. + +For the Jaro and Jaro-Winkler algorithms, check the functions +Jaro() and JaroWinkler(). Read the documentation on these functions. + +For the Soundex algorithm, check the function Soundex(). + +For the Hamming distance algorithm, check the function Hamming(). +*/ +package smetrics diff --git a/vendor/github.com/xrash/smetrics/hamming.go b/vendor/github.com/xrash/smetrics/hamming.go new file mode 100644 index 00000000..505d3e5d --- /dev/null +++ b/vendor/github.com/xrash/smetrics/hamming.go @@ -0,0 +1,25 @@ +package smetrics + +import ( + "fmt" +) + +// The Hamming distance is the minimum number of substitutions required to change string A into string B. Both strings must have the same size. If the strings have different sizes, the function returns an error. +func Hamming(a, b string) (int, error) { + al := len(a) + bl := len(b) + + if al != bl { + return -1, fmt.Errorf("strings are not equal (len(a)=%d, len(b)=%d)", al, bl) + } + + var difference = 0 + + for i := range a { + if a[i] != b[i] { + difference = difference + 1 + } + } + + return difference, nil +} diff --git a/vendor/github.com/xrash/smetrics/jaro-winkler.go b/vendor/github.com/xrash/smetrics/jaro-winkler.go new file mode 100644 index 00000000..abdb2888 --- /dev/null +++ b/vendor/github.com/xrash/smetrics/jaro-winkler.go @@ -0,0 +1,28 @@ +package smetrics + +import ( + "math" +) + +// The Jaro-Winkler distance. The result is 1 for equal strings, and 0 for completely different strings. It is commonly used on Record Linkage stuff, thus it tries to be accurate for common typos when writing real names such as person names and street names. +// Jaro-Winkler is a modification of the Jaro algorithm. It works by first running Jaro, then boosting the score of exact matches at the beginning of the strings. Because of that, it introduces two more parameters: the boostThreshold and the prefixSize. These are commonly set to 0.7 and 4, respectively. +func JaroWinkler(a, b string, boostThreshold float64, prefixSize int) float64 { + j := Jaro(a, b) + + if j <= boostThreshold { + return j + } + + prefixSize = int(math.Min(float64(len(a)), math.Min(float64(prefixSize), float64(len(b))))) + + var prefixMatch float64 + for i := 0; i < prefixSize; i++ { + if a[i] == b[i] { + prefixMatch++ + } else { + break + } + } + + return j + 0.1*prefixMatch*(1.0-j) +} diff --git a/vendor/github.com/xrash/smetrics/jaro.go b/vendor/github.com/xrash/smetrics/jaro.go new file mode 100644 index 00000000..75f924e1 --- /dev/null +++ b/vendor/github.com/xrash/smetrics/jaro.go @@ -0,0 +1,86 @@ +package smetrics + +import ( + "math" +) + +// The Jaro distance. The result is 1 for equal strings, and 0 for completely different strings. +func Jaro(a, b string) float64 { + // If both strings are zero-length, they are completely equal, + // therefore return 1. + if len(a) == 0 && len(b) == 0 { + return 1 + } + + // If one string is zero-length, strings are completely different, + // therefore return 0. + if len(a) == 0 || len(b) == 0 { + return 0 + } + + // Define the necessary variables for the algorithm. + la := float64(len(a)) + lb := float64(len(b)) + matchRange := int(math.Max(0, math.Floor(math.Max(la, lb)/2.0)-1)) + matchesA := make([]bool, len(a)) + matchesB := make([]bool, len(b)) + var matches float64 = 0 + + // Step 1: Matches + // Loop through each character of the first string, + // looking for a matching character in the second string. + for i := 0; i < len(a); i++ { + start := int(math.Max(0, float64(i-matchRange))) + end := int(math.Min(lb-1, float64(i+matchRange))) + + for j := start; j <= end; j++ { + if matchesB[j] { + continue + } + + if a[i] == b[j] { + matchesA[i] = true + matchesB[j] = true + matches++ + break + } + } + } + + // If there are no matches, strings are completely different, + // therefore return 0. + if matches == 0 { + return 0 + } + + // Step 2: Transpositions + // Loop through the matches' arrays, looking for + // unaligned matches. Count the number of unaligned matches. + unaligned := 0 + j := 0 + for i := 0; i < len(a); i++ { + if !matchesA[i] { + continue + } + + for !matchesB[j] { + j++ + } + + if a[i] != b[j] { + unaligned++ + } + + j++ + } + + // The number of unaligned matches divided by two, is the number of _transpositions_. + transpositions := math.Floor(float64(unaligned / 2)) + + // Jaro distance is the average between these three numbers: + // 1. matches / length of string A + // 2. matches / length of string B + // 3. (matches - transpositions/matches) + // So, all that divided by three is the final result. + return ((matches / la) + (matches / lb) + ((matches - transpositions) / matches)) / 3.0 +} diff --git a/vendor/github.com/xrash/smetrics/soundex.go b/vendor/github.com/xrash/smetrics/soundex.go new file mode 100644 index 00000000..a2ad034d --- /dev/null +++ b/vendor/github.com/xrash/smetrics/soundex.go @@ -0,0 +1,41 @@ +package smetrics + +import ( + "strings" +) + +// The Soundex encoding. It is a phonetic algorithm that considers how the words sound in English. Soundex maps a string to a 4-byte code consisting of the first letter of the original string and three numbers. Strings that sound similar should map to the same code. +func Soundex(s string) string { + m := map[byte]string{ + 'B': "1", 'P': "1", 'F': "1", 'V': "1", + 'C': "2", 'S': "2", 'K': "2", 'G': "2", 'J': "2", 'Q': "2", 'X': "2", 'Z': "2", + 'D': "3", 'T': "3", + 'L': "4", + 'M': "5", 'N': "5", + 'R': "6", + } + + s = strings.ToUpper(s) + + r := string(s[0]) + p := s[0] + for i := 1; i < len(s) && len(r) < 4; i++ { + c := s[i] + + if (c < 'A' || c > 'Z') || (c == p) { + continue + } + + p = c + + if n, ok := m[c]; ok { + r += n + } + } + + for i := len(r); i < 4; i++ { + r += "0" + } + + return r +} diff --git a/vendor/github.com/xrash/smetrics/ukkonen.go b/vendor/github.com/xrash/smetrics/ukkonen.go new file mode 100644 index 00000000..3c5579cd --- /dev/null +++ b/vendor/github.com/xrash/smetrics/ukkonen.go @@ -0,0 +1,94 @@ +package smetrics + +import ( + "math" +) + +// The Ukkonen algorithm for calculating the Levenshtein distance. The algorithm is described in http://www.cs.helsinki.fi/u/ukkonen/InfCont85.PDF, or in docs/InfCont85.PDF. It runs on O(t . min(m, n)) where t is the actual distance between strings a and b. It needs O(min(t, m, n)) space. This function might be preferred over WagnerFischer() for *very* similar strings. But test it out yourself. +// The first two parameters are the two strings to be compared. The last three parameters are the insertion cost, the deletion cost and the substitution cost. These are normally defined as 1, 1 and 2 respectively. +func Ukkonen(a, b string, icost, dcost, scost int) int { + var lowerCost int + + if icost < dcost && icost < scost { + lowerCost = icost + } else if dcost < scost { + lowerCost = dcost + } else { + lowerCost = scost + } + + infinite := math.MaxInt32 / 2 + + var r []int + var k, kprime, p, t int + var ins, del, sub int + + if len(a) > len(b) { + t = (len(a) - len(b) + 1) * lowerCost + } else { + t = (len(b) - len(a) + 1) * lowerCost + } + + for { + if (t / lowerCost) < (len(b) - len(a)) { + continue + } + + // This is the right damn thing since the original Ukkonen + // paper minimizes the expression result only, but the uncommented version + // doesn't need to deal with floats so it's faster. + // p = int(math.Floor(0.5*((float64(t)/float64(lowerCost)) - float64(len(b) - len(a))))) + p = ((t / lowerCost) - (len(b) - len(a))) / 2 + + k = -p + kprime = k + + rowlength := (len(b) - len(a)) + (2 * p) + + r = make([]int, rowlength+2) + + for i := 0; i < rowlength+2; i++ { + r[i] = infinite + } + + for i := 0; i <= len(a); i++ { + for j := 0; j <= rowlength; j++ { + if i == j+k && i == 0 { + r[j] = 0 + } else { + if j-1 < 0 { + ins = infinite + } else { + ins = r[j-1] + icost + } + + del = r[j+1] + dcost + sub = r[j] + scost + + if i-1 < 0 || i-1 >= len(a) || j+k-1 >= len(b) || j+k-1 < 0 { + sub = infinite + } else if a[i-1] == b[j+k-1] { + sub = r[j] + } + + if ins < del && ins < sub { + r[j] = ins + } else if del < sub { + r[j] = del + } else { + r[j] = sub + } + } + } + k++ + } + + if r[(len(b)-len(a))+(2*p)+kprime] <= t { + break + } else { + t *= 2 + } + } + + return r[(len(b)-len(a))+(2*p)+kprime] +} diff --git a/vendor/github.com/xrash/smetrics/wagner-fischer.go b/vendor/github.com/xrash/smetrics/wagner-fischer.go new file mode 100644 index 00000000..9883aea0 --- /dev/null +++ b/vendor/github.com/xrash/smetrics/wagner-fischer.go @@ -0,0 +1,48 @@ +package smetrics + +// The Wagner-Fischer algorithm for calculating the Levenshtein distance. +// The first two parameters are the two strings to be compared. The last three parameters are the insertion cost, the deletion cost and the substitution cost. These are normally defined as 1, 1 and 2 respectively. +func WagnerFischer(a, b string, icost, dcost, scost int) int { + + // Allocate both rows. + row1 := make([]int, len(b)+1) + row2 := make([]int, len(b)+1) + var tmp []int + + // Initialize the first row. + for i := 1; i <= len(b); i++ { + row1[i] = i * icost + } + + // For each row... + for i := 1; i <= len(a); i++ { + row2[0] = i * dcost + + // For each column... + for j := 1; j <= len(b); j++ { + if a[i-1] == b[j-1] { + row2[j] = row1[j-1] + } else { + ins := row2[j-1] + icost + del := row1[j] + dcost + sub := row1[j-1] + scost + + if ins < del && ins < sub { + row2[j] = ins + } else if del < sub { + row2[j] = del + } else { + row2[j] = sub + } + } + } + + // Swap the rows at the end of each row. + tmp = row1 + row1 = row2 + row2 = tmp + } + + // Because we swapped the rows, the final result is in row1 instead of row2. + return row1[len(row1)-1] +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 8ee74c30..b56c4ec5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,6 +1,3 @@ -# github.com/antzucaro/matchr v0.0.0-20210222213004-b04723ef80f0 -## explicit; go 1.13 -github.com/antzucaro/matchr # github.com/briandowns/spinner v1.18.1 ## explicit; go 1.14 github.com/briandowns/spinner @@ -61,9 +58,12 @@ github.com/sirupsen/logrus ## explicit; go 1.13 github.com/stretchr/testify/assert github.com/stretchr/testify/require -# github.com/urfave/cli/v2 v2.8.0 +# github.com/urfave/cli/v2 v2.8.1 ## explicit; go 1.18 github.com/urfave/cli/v2 +# github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 +## explicit +github.com/xrash/smetrics # golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 ## explicit; go 1.17 golang.org/x/sys/internal/unsafeheader