Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding LP batching #112

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 78 additions & 17 deletions influxdb3/batching/batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,43 +24,72 @@
package batching

import (
"fmt"
"log/slog"
"sync"

"github.com/InfluxCommunity/influxdb3-go/influxdb3"
)

// Option to adapt properties of a batcher
type Option func(*Batcher)
type Option func(*interface{})

// WithSize changes the batch-size emitted by the batcher
// With the standard Batcher the implied unit is a Point
// With the LPBatcher the implied unit is a byte
func WithSize(size int) Option {
return func(b *Batcher) {
b.size = size
return func(b *interface{}) {
if bb, bok := (*b).(*Batcher); bok {
bb.size = size
} else if lb, lok := (*b).(*LPBatcher); lok {
lb.size = size
} else {
slog.Warn("Failed to match Batcher type in WithSize. Value not set.")
}

Check warning on line 48 in influxdb3/batching/batcher.go

View check run for this annotation

Codecov / codecov/patch

influxdb3/batching/batcher.go#L47-L48

Added lines #L47 - L48 were not covered by tests
}
}

// WithCapacity changes the initial capacity of the points buffer
// WithCapacity changes the initial capacity of the internal buffer
// With the standard Batcher implied unit is a Point
// With the LPBatcher the implied unit is a byte
func WithCapacity(capacity int) Option {
return func(b *Batcher) {
b.capacity = capacity
return func(b *interface{}) {
if bb, bok := (*b).(*Batcher); bok {
bb.capacity = capacity
} else if lb, lok := (*b).(*LPBatcher); lok {
lb.capacity = capacity
} else {
slog.Warn("Failed to match Batcher type in WithCapacity. Value not set.")
}

Check warning on line 63 in influxdb3/batching/batcher.go

View check run for this annotation

Codecov / codecov/patch

influxdb3/batching/batcher.go#L62-L63

Added lines #L62 - L63 were not covered by tests
}
}

// WithReadyCallback sets the function called when a new batch is ready. The
// batcher will wait for the callback to finish, so please return as fast as
// possible and move long-running processing to a go-routine.
func WithReadyCallback(f func()) Option {
return func(b *Batcher) {
b.callbackReady = f
return func(b *interface{}) {
if bb, bok := (*b).(*Batcher); bok {
bb.callbackReady = f
} else if lb, lok := (*b).(*LPBatcher); lok {
lb.callbackReady = f
} else {
slog.Warn("Failed to match Batcher type in WithReadyCallback. Callback not set.")
}

Check warning on line 78 in influxdb3/batching/batcher.go

View check run for this annotation

Codecov / codecov/patch

influxdb3/batching/batcher.go#L77-L78

Added lines #L77 - L78 were not covered by tests
}

}

// WithEmitCallback sets the function called when a new batch is ready with the
// batch of points. The batcher will wait for the callback to finish, so please
// return as fast as possible and move long-running processing to a go-routine.
func WithEmitCallback(f func([]*influxdb3.Point)) Option {
return func(b *Batcher) {
b.callbackEmit = f
return func(b *interface{}) {
if bb, bok := (*b).(*Batcher); bok {
bb.callbackEmit = f
} else {
slog.Warn("Failed to match type Batcher in WithEmitPointsCallback. Callback not set.")
}

Check warning on line 92 in influxdb3/batching/batcher.go

View check run for this annotation

Codecov / codecov/patch

influxdb3/batching/batcher.go#L91-L92

Added lines #L91 - L92 were not covered by tests
}
}

Expand All @@ -70,13 +99,18 @@
// DefaultCapacity is the default initial capacity of the point buffer
const DefaultCapacity = 2 * DefaultBatchSize

// Batcher collects points and emits them as batches
type Batcher struct {
type BaseBatcher struct {
size int
capacity int

callbackReady func()
callbackEmit func([]*influxdb3.Point)
}

// Batcher collects points and emits them as batches
type Batcher struct {
BaseBatcher

callbackEmit func([]*influxdb3.Point)

points []*influxdb3.Point
sync.Mutex
Expand All @@ -87,17 +121,21 @@
// initial capacity is DefaultCapacity.
func NewBatcher(options ...Option) *Batcher {
// Set up a batcher with the default values
b := &Batcher{
base := BaseBatcher{
size: DefaultBatchSize,
capacity: DefaultCapacity,
}
b := &Batcher{
BaseBatcher: base,
}

// Apply the options
for _, o := range options {
o(b)
ptr2arg := interface{}(b)
o(&ptr2arg)
}

// Setup the internal data
// setup internal data
b.points = make([]*influxdb3.Point, 0, b.capacity)

return b
Expand All @@ -112,12 +150,22 @@
b.points = append(b.points, p...)

// Call callbacks if a new batch is ready
if b.isReady() {
for b.isReady() {
if b.callbackReady != nil {
b.callbackReady()
}
if b.callbackEmit != nil {
b.callbackEmit(b.emitPoints())
} else {
// no emitter callback
if b.CurrentLoadSize() >= (b.capacity - b.size) {
slog.Warn(
fmt.Sprintf("Batcher is ready, but no callbackEmit is available. "+
"Batcher load is %d points waiting to be emitted.",
b.CurrentLoadSize()),
)
}
break
}
}
}
Expand Down Expand Up @@ -151,3 +199,16 @@

return points
}

// Flush drains all points even if buffer currently larger than size.
// It does not call the callbackEmit method
func (b *Batcher) Flush() []*influxdb3.Point {
slog.Info(fmt.Sprintf("Flushing all points (%d) from buffer.", b.CurrentLoadSize()))
points := b.points
b.points = b.points[len(points):]
return points
}

func (b *Batcher) CurrentLoadSize() int {
return len(b.points)
}
53 changes: 52 additions & 1 deletion influxdb3/batching/batcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
package batching

import (
"fmt"
"sync"
"testing"
"time"

"github.com/InfluxCommunity/influxdb3-go/influxdb3"
"github.com/stretchr/testify/assert"
Expand All @@ -51,7 +53,7 @@ func TestCustomValues(t *testing.T) {
assert.Equal(t, capacity, cap(b.points))
}

func TestAddAndEmit(t *testing.T) {
func TestAddAndCallBackEmit(t *testing.T) {
batchSize := 5
emitted := false
var emittedPoints []*influxdb3.Point
Expand Down Expand Up @@ -150,3 +152,52 @@ func TestThreadSafety(t *testing.T) {
assert.Equal(t, 20, emits, "All points should have been emitted")
assert.Empty(t, points, "Remaining points should be emitted correctly")
}

func TestAddLargerThanSize(t *testing.T) {
batchSize := 5
emitCt := 0
loadFactor := 10
remainder := 3
pointSet := make([]*influxdb3.Point, (batchSize*loadFactor)+remainder)
for ct := range pointSet {
pointSet[ct] = influxdb3.NewPoint("test",
map[string]string{"foo": "bar"},
map[string]interface{}{"count": ct + 1},
time.Now())
}

resultSet := make([]*influxdb3.Point, 0)
b := NewBatcher(WithSize(batchSize),
WithCapacity(batchSize*3),
WithEmitCallback(func(points []*influxdb3.Point) {
resultSet = append(resultSet, points...)
emitCt++
}))

b.Add(pointSet...)
expectedCt := len(pointSet) / batchSize
assert.Equal(t, expectedCt, emitCt)
assert.Equal(t, loadFactor*batchSize, len(resultSet))
fmt.Printf("DEBUG resultSet %d\n", len(resultSet))
assert.Equal(t, remainder, len(b.points))
assert.Equal(t, pointSet[:len(pointSet)-remainder], resultSet)
}

func TestFlush(t *testing.T) {
batchSize := 5
loadFactor := 3
pointSet := make([]*influxdb3.Point, batchSize*loadFactor)
for ct := range pointSet {
pointSet[ct] = influxdb3.NewPoint("test",
map[string]string{"foo": "bar"},
map[string]interface{}{"count": ct + 1},
time.Now())
}

b := NewBatcher(WithSize(batchSize), WithCapacity(batchSize*2))
b.Add(pointSet...)
assert.Equal(t, batchSize*loadFactor, b.CurrentLoadSize())
flushed := b.Flush()
assert.Equal(t, batchSize*loadFactor, len(flushed))
assert.Equal(t, 0, b.CurrentLoadSize())
}
134 changes: 134 additions & 0 deletions influxdb3/batching/lp_batcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package batching

import (
"bytes"
"fmt"
"log/slog"
"sync"
)

const DefaultBufferSize = 100000
const DefaultBufferCapacity = DefaultBufferSize * 2

func WithEmitBytesCallback(f func([]byte)) Option {
return func(b *interface{}) {
if lpb, ok := (*b).(*LPBatcher); ok {
lpb.callbackEmit = f
} else {
slog.Warn("Failed to match type LPBatcher in WithEmitBytesCallback. Callback not set.")
}

Check warning on line 19 in influxdb3/batching/lp_batcher.go

View check run for this annotation

Codecov / codecov/patch

influxdb3/batching/lp_batcher.go#L18-L19

Added lines #L18 - L19 were not covered by tests
}
}

type LPBatcher struct {
BaseBatcher

callbackEmit func([]byte)

buffer []byte
sync.Mutex
}

func NewLPBatcher(options ...func(*interface{})) *LPBatcher {
base := BaseBatcher{
size: DefaultBufferSize,
capacity: DefaultBufferCapacity,
}
l := &LPBatcher{
BaseBatcher: base,
}

// Apply the options
for _, o := range options {
ptr2arg := interface{}(l)
o(&ptr2arg)
}

// setup internal data
l.buffer = make([]byte, 0, l.capacity)
return l
}

func (l *LPBatcher) Add(lines ...string) {
l.Lock()
defer l.Unlock()

for _, line := range lines {
if len(line) != 0 { // ignore empty lines
l.buffer = append(l.buffer, line...)
if line[len(line)-1] != '\n' { //ensure newline demarcation
l.buffer = append(l.buffer, '\n')
}
}
}

for l.isReady() {
if l.callbackReady != nil {
l.callbackReady()
}
if l.callbackEmit != nil {
l.callbackEmit(l.emitBytes())
} else {
// no emitter callback
if l.CurrentLoadSize() > (l.capacity - l.size) {
slog.Warn(
fmt.Sprintf("Batcher is ready, but no callbackEmit is available. "+
"Batcher load is %d bytes waiting to be emitted.",
l.CurrentLoadSize()),
)
}
break

}
}
}

func (l *LPBatcher) Ready() bool {
l.Lock()
defer l.Unlock()
return l.isReady()
}

func (l *LPBatcher) isReady() bool {
return len(l.buffer) >= l.size
}

// Emit returns a new batch of bytes with the provided batch size or with the
// remaining bytes. Please drain the bytes at the end of your processing to
// get the remaining bytes not filling up a batch.
func (l *LPBatcher) Emit() []byte {
l.Lock()
defer l.Unlock()

return l.emitBytes()
}

func (l *LPBatcher) emitBytes() []byte {
c := min(l.size, len(l.buffer))

prepacket := l.buffer[:c]
lastLF := bytes.LastIndexByte(prepacket, '\n')

if len(prepacket) < 1 || lastLF < 0 {
return prepacket
}
packet := l.buffer[:lastLF]
l.buffer = l.buffer[len(packet):]
if len(l.buffer) == 1 && l.buffer[0] == '\n' { // removing lingering delimiter
l.buffer = l.buffer[1:]
}

return packet
}

// Flush drains all bytes even if buffer currently larger than size
func (l *LPBatcher) Flush() []byte {
slog.Info(fmt.Sprintf("Flushing all bytes (%d) from buffer.", l.CurrentLoadSize()))
packet := l.buffer
l.buffer = l.buffer[len(packet):]
return packet
}

func (l *LPBatcher) CurrentLoadSize() int {
return len(l.buffer)
}
Loading