Skip to content

Commit

Permalink
fix(array): use-after-free in optimized String array (#5377)
Browse files Browse the repository at this point in the history
* chore: modify tests to exercise use after free bugs

Test that potential use after free bugs can be exercised through
the flux language by modifying executetest.ProcessTestHelper2 to
use a memory allocator that scrubs the memory buffer when freeing.

* fix(array): prevent use-after-free errors with string arrays

The optimised string arrays did not always copy values before
freeing the backing data, leading to potentially unpredictable
behaviour.

Because the default allocator, which is presumably used in most
circumstances, uses the go runtime allocator underneath it is likely
that this wouldn't normally affect the correctness of operations.
The more likely problem is that large memory blocks that the execution
engine thinks has been freed are not being garbage collected because
of refences to strings within the memory block.

The array.StringBuilder is changed to always copy the value into
an internal memory buffer when a value is appended, The internal
buffer will always be allocated from the the memory allocator. When
retriving a string value from array.String the returned value will
be a copy from the internal buffer. This makes the string much
easier to reason about as it fits with the standard go string
semantics. This string is allocated by the go runtime rather than
the memory allocator.

The above changes make the system safer, but are likely to introduce
a reduction in performance due to the increase in copying of data
in memory. In order to provide a faster path some byte slice based
methods have been added to allow for less copying in places where
it can easily be determined that values will be finished with before
the memory is freed.

* feat(array): reduced copy string concatenate

Create custom string addition (concatenation) functions that limit
the amount of copying required for the operation.

* chore(stdlib): reduced memory copying for string columns

In places where it is clearly safe to do so use the byte-slice
oriented string column functions to minimize the amount of data
copies that are made when processing string columns.
  • Loading branch information
mhilton authored Feb 1, 2023
1 parent affc0f7 commit 3d8cd33
Show file tree
Hide file tree
Showing 14 changed files with 241 additions and 109 deletions.
66 changes: 61 additions & 5 deletions array/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package array

import (
"strconv"
"sync/atomic"

"github.com/apache/arrow/go/v7/arrow"
"github.com/apache/arrow/go/v7/arrow/array"
arrowmem "github.com/apache/arrow/go/v7/arrow/memory"
"github.com/influxdata/flux/codes"
"github.com/influxdata/flux/internal/errors"
"github.com/influxdata/flux/memory"
Expand Down Expand Up @@ -102,9 +104,9 @@ type Builder interface {
}

type String struct {
value string
length int
data *array.Binary
value *stringValue
}

// NewStringFromBinaryArray creates an instance of String from
Expand Down Expand Up @@ -162,12 +164,16 @@ func (a *String) Len() int {
func (a *String) Retain() {
if a.data != nil {
a.data.Retain()
return
}
a.value.Retain()
}
func (a *String) Release() {
if a.data != nil {
a.data.Release()
return
}
a.value.Release()
}
func (a *String) Slice(i, j int) Array {
if a.data != nil {
Expand All @@ -177,27 +183,77 @@ func (a *String) Slice(i, j int) Array {
data: array.NewBinaryData(data),
}
}
a.value.Retain()
return &String{
value: a.value,
length: j - i,
}
}
func (a *String) Value(i int) string {

// ValueBytes returns a byte slice containing the value of this string
// at index i. This slice points to the contents of the data buffer and
// is only valid for the lifetime of the array.
func (a *String) ValueBytes(i int) []byte {
if a.data != nil {
return a.data.ValueString(i)
return a.data.Value(i)
}
return a.value
return a.value.Bytes()
}

// Value returns a string copy of the value stored at index i. The
// returned value will outlive the array and is safe to use like any
// other go string. The memory backing the string will be allocated by
// the runtime, rather than any provided allocator.
func (a *String) Value(i int) string {
return string(a.ValueBytes(i))
}
func (a *String) ValueLen(i int) int {
if a.data != nil {
return a.data.ValueLen(i)
}
return len(a.value)
return a.value.Len()
}
func (a *String) IsConstant() bool {
return a.data == nil
}

type stringValue struct {
data []byte

mem arrowmem.Allocator
rc int64
}

func (v *stringValue) Retain() {
if v == nil {
return
}
atomic.AddInt64(&v.rc, 1)
}

func (v *stringValue) Release() {
if v == nil {
return
}
if atomic.AddInt64(&v.rc, -1) == 0 {
v.mem.Free(v.data)
}
}

func (v *stringValue) Bytes() []byte {
if v == nil {
return nil
}
return v.data
}

func (v *stringValue) Len() int {
if v == nil {
return 0
}
return len(v.data)
}

type sliceable interface {
Slice(i, j int) Array
}
Expand Down
4 changes: 2 additions & 2 deletions array/array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func TestString(t *testing.T) {
b.Append("a")
}
},
sz: 0,
sz: 1,
want: []interface{}{
"a", "a", "a", "a", "a",
"a", "a", "a", "a", "a",
Expand Down Expand Up @@ -165,7 +165,7 @@ func TestStringBuilder_NewArray(t *testing.T) {
}

arr := b.NewArray()
mem.AssertSize(t, 0)
assert.Equal(t, 1, mem.CurrentAlloc(), "unexpected memory allocation.")
arr.Release()
mem.AssertSize(t, 0)

Expand Down
57 changes: 0 additions & 57 deletions array/binary.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,63 +189,6 @@ func FloatAddRConst(l *Float, r float64, mem memory.Allocator) (*Float, error) {
return a, nil
}

func StringAdd(l, r *String, mem memory.Allocator) (*String, error) {
n := l.Len()
if n != r.Len() {
return nil, errors.Newf(codes.Invalid, "vectors must have equal length for binary operations")
}
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if l.IsValid(i) && r.IsValid(i) {

b.Append(l.Value(i) + r.Value(i))

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}

func StringAddLConst(l string, r *String, mem memory.Allocator) (*String, error) {
n := r.Len()
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if r.IsValid(i) {

b.Append(l + r.Value(i))

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}

func StringAddRConst(l *String, r string, mem memory.Allocator) (*String, error) {
n := l.Len()
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if l.IsValid(i) {

b.Append(l.Value(i) + r)

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}

func IntSub(l, r *Int, mem memory.Allocator) (*Int, error) {
n := l.Len()
if n != r.Len() {
Expand Down
67 changes: 67 additions & 0 deletions array/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,70 @@ func OrConst(fixed *bool, arr *Boolean, mem memory.Allocator) (*Boolean, error)
b.Release()
return a, nil
}

func StringAdd(l, r *String, mem memory.Allocator) (*String, error) {
n := l.Len()
if n != r.Len() {
return nil, errors.Newf(codes.Invalid, "vectors must have equal length for binary operations")
}
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if l.IsValid(i) && r.IsValid(i) {
lb := l.ValueBytes(i)
rb := r.ValueBytes(i)
buf := make([]byte, len(lb)+len(rb))
copy(buf, lb)
copy(buf[len(lb):], rb)
b.AppendBytes(buf)

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}

func StringAddLConst(l string, r *String, mem memory.Allocator) (*String, error) {
n := r.Len()
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if r.IsValid(i) {
rb := r.ValueBytes(i)
buf := make([]byte, len(l)+len(rb))
copy(buf, l)
copy(buf[len(l):], rb)
b.AppendBytes(buf)

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}

func StringAddRConst(l *String, r string, mem memory.Allocator) (*String, error) {
n := l.Len()
b := NewStringBuilder(mem)
b.Resize(n)
for i := 0; i < n; i++ {
if l.IsValid(i) {
lb := l.ValueBytes(i)
buf := make([]byte, len(lb)+len(r))
copy(buf, lb)
copy(buf[len(lb):], r)
b.AppendBytes(buf)

} else {
b.AppendNull()
}
}
a := b.NewStringArray()
b.Release()
return a, nil
}
2 changes: 1 addition & 1 deletion array/binary.tmpldata
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"Name": "Add",
"Op": "+",
"Types": ["Int", "Uint", "Float", "String"]
"Types": ["Int", "Uint", "Float"]
},
{
"Name": "Sub",
Expand Down
Loading

0 comments on commit 3d8cd33

Please sign in to comment.