Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changelog optimization #4242

Merged
merged 2 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 48 additions & 25 deletions pkg/changelog/changelog.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package changelog

import (
"slices"
"time"

"github.com/aquasecurity/tracee/pkg/logger"
Expand Down Expand Up @@ -85,9 +84,9 @@ func (clv *Changelog[T]) Get(targetTime time.Time) T {

// GetAll returns all the values of the changelog.
func (clv *Changelog[T]) GetAll() []T {
values := make([]T, len(clv.changes))
for i := range clv.changes {
values[i] = clv.changes[i].value
values := make([]T, 0, len(clv.changes))
for _, change := range clv.changes {
values = append(values, change.value)
}
return values
}
Expand Down Expand Up @@ -165,33 +164,57 @@ func (clv *Changelog[T]) findIndex(target time.Time) int {

// enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit.
// It applies two methods to reduce the log size to the maximum allowed:
// 1. Unite duplicate values that are trailing one another.
// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair.
// 2. Remove the oldest logs as they are likely less important.

func (clv *Changelog[T]) enforceSizeBoundary() {
if len(clv.changes) > clv.maxSize {
// Get rid of oldest changes to keep max size boundary
boundaryDiff := len(clv.changes) - clv.maxSize

// First try to unite states
clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool {
if i.value == i2.value && boundaryDiff > 0 {
delete(clv.timestamps, i2.timestamp)
boundaryDiff--
return true
}
return false
})

if boundaryDiff == 0 {
return
if len(clv.changes) <= clv.maxSize {
// Nothing to do
return
}

boundaryDiff := len(clv.changes) - clv.maxSize
changed := false

// Compact the slice in place
writeIdx := 0
for readIdx := 0; readIdx < len(clv.changes); readIdx++ {
nextIdx := readIdx + 1
if nextIdx < len(clv.changes) &&
clv.changes[nextIdx].value == clv.changes[readIdx].value &&
boundaryDiff > 0 {
// Remove the oldest (readIdx) from the duplicate pair
delete(clv.timestamps, clv.changes[readIdx].timestamp)
boundaryDiff--
changed = true
continue
}
removedChanges := clv.changes[:boundaryDiff]
clv.changes = clv.changes[boundaryDiff:]
for _, removedChange := range removedChanges {
delete(clv.timestamps, removedChange.timestamp)

// If elements have been removed or moved, update the map and the slice
if changed {
clv.changes[writeIdx] = clv.changes[readIdx]
}

writeIdx++
}

if changed {
clear(clv.changes[writeIdx:])
clv.changes = clv.changes[:writeIdx]
}

if len(clv.changes) <= clv.maxSize {
// Size is within limits after compaction
return
}

// As it still exceeds maxSize, remove the oldest entries in the remaining slice
boundaryDiff = len(clv.changes) - clv.maxSize
for i := 0; i < boundaryDiff; i++ {
delete(clv.timestamps, clv.changes[i].timestamp)
}
clear(clv.changes[:boundaryDiff])
clv.changes = clv.changes[boundaryDiff:]
}

// returnZero returns the zero value of the type T.
Expand Down
233 changes: 233 additions & 0 deletions pkg/changelog/changelog_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
package changelog

import (
"testing"
"time"
)

func Benchmark_enforceSizeBoundary(b *testing.B) {
testCases := []struct {
name string
changelog Changelog[int]
}{
{
name: "No change needed",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
},
maxSize: 5,
},
},
{
name: "Trim excess with duplicates",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 1, timestamp: getTimeFromSec(2)},
{value: 2, timestamp: getTimeFromSec(3)},
{value: 3, timestamp: getTimeFromSec(4)},
{value: 3, timestamp: getTimeFromSec(5)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
getTimeFromSec(3): {},
getTimeFromSec(4): {},
getTimeFromSec(5): {},
},
maxSize: 3,
},
},
{
name: "Remove oldest entries",
changelog: Changelog[int]{
changes: []item[int]{
{value: 1, timestamp: getTimeFromSec(1)},
{value: 2, timestamp: getTimeFromSec(2)},
{value: 3, timestamp: getTimeFromSec(3)},
{value: 4, timestamp: getTimeFromSec(4)},
},
timestamps: map[time.Time]struct{}{
getTimeFromSec(1): {},
getTimeFromSec(2): {},
getTimeFromSec(3): {},
getTimeFromSec(4): {},
},
maxSize: 2,
},
},
}

for _, tc := range testCases {
b.Run(tc.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
clv := tc.changelog // Create a copy for each iteration
clv.enforceSizeBoundary()
}
})
}
}

func Benchmark_setAt(b *testing.B) {
// Test cases where the Changelog needs to enforce the size boundary
testCasesAllScenarios := []struct {
value int
time time.Time
}{
{
value: 42,
time: getTimeFromSec(0),
},
{
value: 72,
time: getTimeFromSec(1),
},
{
value: 642,
time: getTimeFromSec(2),
},
{
value: 672,
time: getTimeFromSec(3), // will trigger removal of oldest entry
},
{
value: 642,
time: getTimeFromSec(4), // will trigger coalescing of duplicate values
},
{
value: 672,
time: getTimeFromSec(5), // will trigger coalescing of duplicate values
},
{
value: 6642,
time: getTimeFromSec(6), // will trigger removal of oldest entry
},
{
value: 672,
time: getTimeFromSec(7), // will trigger coalescing of duplicate values
},
{
value: 642,
time: getTimeFromSec(8), // will trigger coalescing of duplicate values
},
{
value: 6672,
time: getTimeFromSec(9), // will trigger removal of oldest entry
},
{
value: 9642,
time: getTimeFromSec(10), // will trigger removal of oldest entry
},
{
value: 0,
time: getTimeFromSec(0), // will just update the value
},
{
value: 0,
time: getTimeFromSec(1), // will just update the value
},
{
value: 0,
time: getTimeFromSec(2), // will just update the value
},
{
value: 0,
time: getTimeFromSec(3), // will just update the value
},
}

b.Run("All Scenarios", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
clv := NewChangelog[int](3)
b.StartTimer()
for _, tc := range testCasesAllScenarios {
clv.setAt(tc.value, tc.time)
}
}
})

// Test cases where the changelog is within the maximum size limit
testCasesWithinLimit := []struct {
value int
time time.Time
}{
{
value: 0,
time: getTimeFromSec(0),
},
{
value: 1,
time: getTimeFromSec(1),
},
{
value: 2,
time: getTimeFromSec(2),
},
{
value: 3,
time: getTimeFromSec(3),
},
{
value: 4,
time: getTimeFromSec(4),
},
{
value: 5,
time: getTimeFromSec(5),
},
{
value: 6,
time: getTimeFromSec(6),
},
{
value: 7,
time: getTimeFromSec(7),
},
{
value: 8,
time: getTimeFromSec(8),
},
{
value: 9,
time: getTimeFromSec(9),
},
{
value: 10,
time: getTimeFromSec(10),
},
{
value: 11,
time: getTimeFromSec(11),
},
{
value: 12,
time: getTimeFromSec(12),
},
{
value: 13,
time: getTimeFromSec(13),
},
{
value: 14,
time: getTimeFromSec(14),
},
}

b.Run("Within Limit", func(b *testing.B) {
for i := 0; i < b.N; i++ {
b.StopTimer()
clv := NewChangelog[int](15)
b.StartTimer()
for _, tc := range testCasesWithinLimit {
clv.setAt(tc.value, tc.time)
}
}
})
}
Loading
Loading