Skip to content

Commit

Permalink
feat: improve LongestRepeatingSequence performance by reducing alloca…
Browse files Browse the repository at this point in the history
…tions (#575)

Uses an optimization to keep only two arrays + other misc changes.

Benchmarks:

$ go test -bench=. -benchmem -memprofile=allocs
goos: darwin
goarch: amd64
pkg: github.com/projectdiscovery/utils/strings
cpu: VirtualApple @ 2.50GHz
BenchmarkLongestRepeatingSequence-10              171566              6612 ns/op           15584 B/op         61 allocs/op
BenchmarkLongestRepeatingSequenceNew-10              360710              3129 ns/op             704 B/op          2 allocs/op +++
PASS
ok      github.com/projectdiscovery/utils/strings       1.851s
  • Loading branch information
Ice3man543 authored Dec 1, 2024
1 parent 3cb03c0 commit cebafa1
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 27 deletions.
51 changes: 29 additions & 22 deletions strings/stringsutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,41 +231,48 @@ type LongestSequence struct {

// LongestRepeatingSequence finds the longest repeating non-overlapping sequence in a string
func LongestRepeatingSequence(s string) LongestSequence {
res := ""
resLength := 0
n := len(s)
lcsre := make([][]int, n+1)

for i := range lcsre {
lcsre[i] = make([]int, n+1)
if n == 0 {
return LongestSequence{}
}

idx := 0
// Use single row instead of full matrix
prev := make([]int, n+1)
curr := make([]int, n+1)

maxLen := 0
endPos := 0

for i := 1; i <= n; i++ {
for j := i + 1; j <= n; j++ {
if s[i-1] == s[j-1] && lcsre[i-1][j-1] < (j-i) {
lcsre[i][j] = lcsre[i-1][j-1] + 1
if lcsre[i][j] > resLength {
resLength = lcsre[i][j]
if i > idx {
idx = i
}
if s[i-1] == s[j-1] && prev[j-1] < (j-i) {
curr[j] = prev[j-1] + 1
if curr[j] > maxLen {
maxLen = curr[j]
endPos = i
}
} else {
lcsre[i][j] = 0
curr[j] = 0
}
}
prev, curr = curr, prev
for j := range curr {
curr[j] = 0
}
}
if resLength > 0 {
for i := idx - resLength + 1; i <= idx; i++ {
res += string(s[i-1])

var sequence string
if maxLen > 0 {
start := endPos - maxLen
if start >= 0 {
sequence = s[start:endPos]
}
}
resCount := 0
if res != "" {
resCount = strings.Count(s, res)

return LongestSequence{
Sequence: sequence,
Count: strings.Count(s, sequence),
}
return LongestSequence{Sequence: res, Count: resCount}
}

// IsPrintable checks if the strings is made only of printable characters
Expand Down
28 changes: 23 additions & 5 deletions strings/stringsutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,20 @@ func TestLongestRepeatingSequence(t *testing.T) {
s string
expected string
}{
{"abcdefg", ""},
{"abcabcabc", "abc"},
{"abcdefabcdef", "abcdef"},
{"abcdefgabcdefg", "abcdefg"},
{"abcabcdefdef", "abc"},
{s: "abcabca", expected: "abc"},
{s: "abcdefg", expected: ""},
{s: "abcabcabc", expected: "abc"},
{s: "abcdefabcdef", expected: "abcdef"},
{s: "abcdefgabcdefg", expected: "abcdefg"},
{s: "abcabcdefdef", expected: "abc"},

// edge cases
{s: "aaa", expected: "a"},
{s: "aaaa", expected: "aa"},
{s: "abababab", expected: "abab"},
{s: "test test test", expected: "test "},
{s: "AbcAbcAbc", expected: "Abc"},
{s: "!@#$!@#$", expected: "!@#$"},
}

for _, test := range tests {
Expand Down Expand Up @@ -467,3 +476,12 @@ func TestNormalizeWithOptions(t *testing.T) {
require.Equal(t, test.result, res)
}
}

func BenchmarkLongestRepeatingSequence(b *testing.B) {
s := "AAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBB" // 40 chars test string
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
LongestRepeatingSequence(s)
}
}

0 comments on commit cebafa1

Please sign in to comment.