Skip to content

Commit

Permalink
exec: Update NULL handling in Vec Copy
Browse files Browse the repository at this point in the history
The copy method in Vec previously unset ALL NULL values in the
destination vector, and then proceeded to set the appropriate
NULL values from the source vector, which is incorrect. To address
this problem, this commit adds a UnsetNullRange operation to
the Nulls object. The operation unsets all null values within an
input range, but is unable to smartly/efficiently identify when
the vector no longer contains any null values. The best way
to address this problem can be left to future PR's and discussion.

Release note: None
  • Loading branch information
rohany committed Jul 3, 2019
1 parent 3c0aa0b commit e7ee473
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 22 deletions.
36 changes: 36 additions & 0 deletions pkg/sql/exec/coldata/nulls.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,42 @@ func (n *Nulls) SetNullRange(start uint64, end uint64) {
}
}

// UnsetNullRange unsets all the nulls in the range [start, end).
func (n *Nulls) UnsetNullRange(start, end uint64) {
if start >= end {
return
}

sIdx := start / 8
eIdx := (end - 1) / 8

// Case where mask only spans one byte.
if sIdx == eIdx {
mask := onesMask << (start % 8)
if end%8 != 0 {
mask = mask & (onesMask >> (8 - (end % 8)))
}
n.nulls[sIdx] |= mask
return
}

// Case where mask spans at least two bytes.
if sIdx < eIdx {
mask := onesMask << (start % 8)
n.nulls[sIdx] |= mask
if end%8 == 0 {
n.nulls[eIdx] = onesMask
} else {
mask = onesMask >> (8 - (end % 8))
n.nulls[eIdx] |= mask
}

for i := sIdx + 1; i < eIdx; i++ {
n.nulls[i] = onesMask
}
}
}

// Truncate sets all values greater than start to null.
func (n *Nulls) Truncate(start uint16) {
end := uint64(len(n.nulls) * 8)
Expand Down
15 changes: 15 additions & 0 deletions pkg/sql/exec/coldata/nulls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ func TestSetNullRange(t *testing.T) {
}
}

func TestUnsetNullRange(t *testing.T) {
for _, start := range pos {
for _, end := range pos {
n := NewNulls(BatchSize)
n.SetNulls()
n.UnsetNullRange(start, end)
for i := uint64(0); i < BatchSize; i++ {
expected := i >= start && i < end
require.NotEqual(t, expected, n.NullAt64(i),
"NullAt(%d) saw %t, expected %t, after SetNullRange(%d, %d)", i, n.NullAt64(i), !expected, start, end)
}
}
}
}

func TestNullsTruncate(t *testing.T) {
for _, size := range pos {
n := NewNulls(BatchSize)
Expand Down
52 changes: 52 additions & 0 deletions pkg/sql/exec/coldata/vec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,58 @@ func TestCopy(t *testing.T) {
}
}

func TestCopyNulls(t *testing.T) {
const typ = types.Int64

// Set up the destination vector.
dst := NewMemColumn(typ, BatchSize)
dstInts := dst.Int64()
for i := range dstInts {
dstInts[i] = int64(1)
}
// Set some nulls in the destination vector.
for i := 0; i < 5; i++ {
dst.Nulls().SetNull(uint16(i))
}

// Set up the source vector.
src := NewMemColumn(typ, BatchSize)
srcInts := src.Int64()
for i := range srcInts {
srcInts[i] = 2
}
// Set some nulls in the source.
for i := 3; i < 8; i++ {
src.Nulls().SetNull(uint16(i))
}

copyArgs := CopyArgs{
ColType: typ,
Src: src,
DestIdx: 3,
SrcStartIdx: 3,
SrcEndIdx: 10,
}

dst.Copy(copyArgs)

// Verify that original nulls arent deleted, and that
// the nulls in the source have been copied over.
for i := 0; i < 8; i++ {
require.True(t, dst.Nulls().NullAt(uint16(i)), "expected null at %d, found not null", i)
}

// Verify that the data from src has been copied over.
for i := 8; i < 10; i++ {
require.True(t, dstInts[i] == 2, "data from src was not copied over")
}

// Verify that the remaining elements in dst have not been touched.
for i := 10; i < BatchSize; i++ {
require.True(t, dstInts[i] == 1, "data in dst outside copy range has been changed")
}
}

func BenchmarkAppend(b *testing.B) {
const typ = types.Int64

Expand Down
30 changes: 8 additions & 22 deletions pkg/sql/exec/coldata/vec_tmpl.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,11 @@ func (m *memColumn) Append(args AppendArgs) {
}

func (m *memColumn) Copy(args CopyArgs) {
if args.DestIdx != 0 && m.HasNulls() {
panic("copying to non-zero dest index with nulls is not implemented yet (would overwrite nulls)")
}
if args.Nils != nil && args.Sel64 == nil {
panic("Nils set without Sel64")
}
// TODO(asubiotto): This is extremely wrong (we might be overwriting nulls
// past the end of where we are copying to that should be left alone).
// Previous code did this though so we won't be introducing new problems. We
// really have to fix and test this.
m.Nulls().UnsetNulls()

m.Nulls().UnsetNullRange(args.DestIdx, args.DestIdx+(args.SrcEndIdx-args.SrcStartIdx))

switch args.ColType {
// {{range .}}
Expand Down Expand Up @@ -152,21 +146,13 @@ func (m *memColumn) Copy(args CopyArgs) {
}
// No Sel or Sel64.
copy(toCol[args.DestIdx:], fromCol[args.SrcStartIdx:args.SrcEndIdx])
// We do not check for existence of nulls in m due to forcibly unsetting
// the bitmap at the start.
if args.Src.HasNulls() {
m.nulls.hasNulls = true
if args.DestIdx == 0 && args.SrcStartIdx == 0 {
// We can copy this bitmap indiscriminately.
copy(m.nulls.nulls, args.Src.Nulls().NullBitmap())
} else {
// TODO(asubiotto): This should use Extend but Extend only takes uint16
// arguments.
srcNulls := args.Src.Nulls()
for curDestIdx, curSrcIdx := args.DestIdx, args.SrcStartIdx; curSrcIdx < args.SrcEndIdx; curDestIdx, curSrcIdx = curDestIdx+1, curSrcIdx+1 {
if srcNulls.NullAt64(curSrcIdx) {
m.nulls.SetNull64(curDestIdx)
}
// TODO(asubiotto): This should use Extend but Extend only takes uint16
// arguments.
srcNulls := args.Src.Nulls()
for curDestIdx, curSrcIdx := args.DestIdx, args.SrcStartIdx; curSrcIdx < args.SrcEndIdx; curDestIdx, curSrcIdx = curDestIdx+1, curSrcIdx+1 {
if srcNulls.NullAt64(curSrcIdx) {
m.nulls.SetNull64(curDestIdx)
}
}
}
Expand Down

0 comments on commit e7ee473

Please sign in to comment.