From af650d0a54ed09b1def0726959ce225dd75af605 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Tue, 16 Oct 2018 16:54:45 +0800 Subject: [PATCH 01/10] util: add PreAlloc4Row and Insert for Chunk and List --- util/chunk/chunk.go | 55 +++++++++++++++++++++ util/chunk/chunk_test.go | 103 +++++++++++++++++++++++++++++++++++++++ util/chunk/list.go | 23 +++++++++ util/chunk/list_test.go | 43 ++++++++++++++++ 4 files changed, 224 insertions(+) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index d09150f1ab84b..a35722c3cb204 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -15,6 +15,7 @@ package chunk import ( "encoding/binary" + "reflect" "unsafe" "github.com/cznic/mathutil" @@ -277,6 +278,60 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } +// PreAlloc4Row pre-allocates the memory space for a Row. +// The null elem info will be pre-written. +func (c *Chunk) PreAlloc4Row(row Row) { + for i, rowCol := range row.c.columns { + chkCol := c.columns[i] + chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + if rowCol.isFixed() { + elemLen := len(rowCol.elemBuf) + if len(chkCol.data)+elemLen >= cap(chkCol.data) { + chkCol.data = make([]byte, len(chkCol.data)+elemLen, 2*cap(chkCol.data)) + } else { + (*reflect.SliceHeader)(unsafe.Pointer(&chkCol.data)).Len = len(chkCol.data) + elemLen + } + } else { + elemLen := int(rowCol.offsets[row.idx+1] - rowCol.offsets[row.idx]) + if len(chkCol.data)+elemLen >= cap(chkCol.data) { + chkCol.data = make([]byte, len(chkCol.data)+elemLen, 2*cap(chkCol.data)) + } else { + (*reflect.SliceHeader)(unsafe.Pointer(&chkCol.data)).Len = len(chkCol.data) + elemLen + } + chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + } + chkCol.length++ + } + c.numVirtualRows++ +} + +// Insert inserts row on rowIdx. +// Note: Insert will cover the origin data, it should be called after +// PreAlloc4Row. +func (c *Chunk) Insert(rowIdx int, row Row) { + // Check data length between row and the origin data for every column. + // Cover the origin data if the upper check is valid. + for i, rowCol := range row.c.columns { + chkCol := c.columns[i] + if chkCol.isFixed() != rowCol.isFixed() { + panic("unexcepted error happens during Chunk.Insert") + } + var srcStart, srcEnd, destStart, destEnd int + if rowCol.isFixed() { + srcElemLen, destElemLen := len(rowCol.elemBuf), len(chkCol.elemBuf) + srcStart, destStart = row.idx*srcElemLen, rowIdx*destElemLen + srcEnd, destEnd = srcStart+srcElemLen, destStart+destElemLen + } else { + srcStart, srcEnd = int(rowCol.offsets[row.idx]), int(rowCol.offsets[row.idx+1]) + destStart, destEnd = int(chkCol.offsets[rowIdx]), int(chkCol.offsets[rowIdx+1]) + } + if destEnd-destStart != srcEnd-srcStart { + panic("unexcepted error happens during Chunk.Insert") + } + copy(chkCol.data[destStart:destEnd], rowCol.data[srcStart:srcEnd]) + } +} + // Append appends rows in [begin, end) in another Chunk to a Chunk. func (c *Chunk) Append(other *Chunk, begin, end int) { for colID, src := range other.columns { diff --git a/util/chunk/chunk_test.go b/util/chunk/chunk_test.go index 799a6e703ca64..b1b679e982bfc 100644 --- a/util/chunk/chunk_test.go +++ b/util/chunk/chunk_test.go @@ -18,6 +18,8 @@ import ( "fmt" "math" "strconv" + "strings" + "sync" "testing" "time" "unsafe" @@ -517,6 +519,107 @@ func (s *testChunkSuite) TestSwapColumn(c *check.C) { checkRef() } +func (s *testChunkSuite) TestPreAlloc4RowAndInsert(c *check.C) { + fieldTypes := make([]*types.FieldType, 0, 4) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeFloat}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeNewDecimal}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeVarchar}) + + srcChk := NewChunkWithCapacity(fieldTypes, 10) + for i := int64(0); i < 10; i++ { + srcChk.AppendFloat32(0, float32(i)) + srcChk.AppendInt64(1, i) + srcChk.AppendMyDecimal(2, types.NewDecFromInt(i)) + srcChk.AppendString(3, strings.Repeat(strconv.FormatInt(i, 10), int(i))) + } + + destChk := NewChunkWithCapacity(fieldTypes, 3) + + // Test Chunk.PreAlloc4Row. + for i := 0; i < srcChk.NumRows(); i++ { + c.Assert(destChk.NumRows(), check.Equals, i) + destChk.PreAlloc4Row(srcChk.GetRow(i)) + } + for i, srcCol := range srcChk.columns { + destCol := destChk.columns[i] + c.Assert(len(srcCol.elemBuf), check.Equals, len(destCol.elemBuf)) + c.Assert(len(srcCol.data), check.Equals, len(destCol.data)) + c.Assert(len(srcCol.offsets), check.Equals, len(destCol.offsets)) + c.Assert(len(srcCol.nullBitmap), check.Equals, len(destCol.nullBitmap)) + c.Assert(srcCol.length, check.Equals, destCol.length) + c.Assert(srcCol.nullCount, check.Equals, destCol.nullCount) + + for _, val := range destCol.data { + c.Assert(val == 0, check.IsTrue) + } + for j, val := range srcCol.offsets { + c.Assert(val, check.Equals, destCol.offsets[j]) + } + for j, val := range srcCol.nullBitmap { + c.Assert(val, check.Equals, destCol.nullBitmap[j]) + } + for _, val := range destCol.elemBuf { + c.Assert(val == 0, check.IsTrue) + } + } + + // Test Chunk.Insert. + for i := srcChk.NumRows() - 1; i >= 0; i-- { + destChk.Insert(i, srcChk.GetRow(i)) + } + for i, srcCol := range srcChk.columns { + destCol := destChk.columns[i] + + for j, val := range srcCol.data { + c.Assert(val, check.Equals, destCol.data[j]) + } + for j, val := range srcCol.offsets { + c.Assert(val, check.Equals, destCol.offsets[j]) + } + for j, val := range srcCol.nullBitmap { + c.Assert(val, check.Equals, destCol.nullBitmap[j]) + } + for _, val := range destCol.elemBuf { + c.Assert(val == 0, check.IsTrue) + } + } + + // Test parallel Chunk.Insert. + destChk.Reset() + startWg, endWg := &sync.WaitGroup{}, &sync.WaitGroup{} + startWg.Add(1) + for i := 0; i < srcChk.NumRows(); i++ { + destChk.PreAlloc4Row(srcChk.GetRow(i)) + endWg.Add(1) + go func(rowIdx int) { + defer func() { + endWg.Done() + }() + startWg.Wait() + destChk.Insert(rowIdx, srcChk.GetRow(rowIdx)) + }(i) + } + startWg.Done() + endWg.Wait() + for i, srcCol := range srcChk.columns { + destCol := destChk.columns[i] + + for j, val := range srcCol.data { + c.Assert(val, check.Equals, destCol.data[j]) + } + for j, val := range srcCol.offsets { + c.Assert(val, check.Equals, destCol.offsets[j]) + } + for j, val := range srcCol.nullBitmap { + c.Assert(val, check.Equals, destCol.nullBitmap[j]) + } + for _, val := range destCol.elemBuf { + c.Assert(val == 0, check.IsTrue) + } + } +} + func BenchmarkAppendInt(b *testing.B) { b.ReportAllocs() chk := newChunk(8) diff --git a/util/chunk/list.go b/util/chunk/list.go index da789211d5a0d..d286502a473ef 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -140,6 +140,29 @@ func (l *List) Reset() { l.consumedIdx = -1 } +// PreAlloc4Row pre-allocate the storage memory for a Row. +func (l *List) PreAlloc4Row(row Row) (ptr RowPtr) { + chkIdx := len(l.chunks) - 1 + if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() || chkIdx == l.consumedIdx { + newChk := l.allocChunk() + l.chunks = append(l.chunks, newChk) + if chkIdx != l.consumedIdx { + l.memTracker.Consume(l.chunks[chkIdx].MemoryUsage()) + l.consumedIdx = chkIdx + } + chkIdx++ + } + chk := l.chunks[chkIdx] + rowIdx := chk.NumRows() + chk.PreAlloc4Row(row) + l.length++ + return RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)} +} + +func (l *List) Insert(ptr RowPtr, row Row) { + l.chunks[ptr.ChkIdx].Insert(int(ptr.RowIdx), row) +} + // ListWalkFunc is used to walk the list. // If error is returned, it will stop walking. type ListWalkFunc = func(row Row) error diff --git a/util/chunk/list_test.go b/util/chunk/list_test.go index 646812331ceb8..59ec4ca98e5bb 100644 --- a/util/chunk/list_test.go +++ b/util/chunk/list_test.go @@ -22,6 +22,8 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" + "strconv" + "strings" ) func (s *testChunkSuite) TestList(c *check.C) { @@ -114,6 +116,47 @@ func (s *testChunkSuite) TestListMemoryUsage(c *check.C) { c.Assert(list.GetMemTracker().BytesConsumed(), check.Equals, memUsage+srcChk.MemoryUsage()) } +func (s *testChunkSuite) TestListPrePreAlloc4RowAndInsert(c *check.C) { + fieldTypes := make([]*types.FieldType, 0, 4) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeFloat}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeLonglong}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeNewDecimal}) + fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeVarchar}) + + srcChk := NewChunkWithCapacity(fieldTypes, 10) + for i := int64(0); i < 10; i++ { + srcChk.AppendFloat32(0, float32(i)) + srcChk.AppendInt64(1, i) + srcChk.AppendMyDecimal(2, types.NewDecFromInt(i)) + srcChk.AppendString(3, strings.Repeat(strconv.FormatInt(i, 10), int(i))) + } + + srcList := NewList(fieldTypes, 3, 3) + destList := NewList(fieldTypes, 5, 5) + destRowPtr := make([]RowPtr, srcChk.NumRows()) + for i := 0; i < srcChk.NumRows(); i++ { + srcList.AppendRow(srcChk.GetRow(i)) + destRowPtr[i] = destList.PreAlloc4Row(srcChk.GetRow(i)) + } + + c.Assert(srcList.NumChunks(), check.Equals, 4) + c.Assert(destList.NumChunks(), check.Equals, 2) + + iter4Src := NewIterator4List(srcList) + for row, i := iter4Src.Begin(), 0; row != iter4Src.End(); row, i = iter4Src.Next(), i+1 { + destList.Insert(destRowPtr[i], row) + } + + iter4Dest := NewIterator4List(destList) + srcRow, destRow := iter4Src.Begin(), iter4Dest.Begin() + for ; srcRow != iter4Src.End(); srcRow, destRow = iter4Src.Next(), iter4Dest.Next() { + c.Assert(srcRow.GetFloat32(0), check.Equals, destRow.GetFloat32(0)) + c.Assert(srcRow.GetInt64(1), check.Equals, destRow.GetInt64(1)) + c.Assert(srcRow.GetMyDecimal(2).Compare(destRow.GetMyDecimal(2)) == 0, check.IsTrue) + c.Assert(srcRow.GetString(3), check.Equals, destRow.GetString(3)) + } +} + func BenchmarkListMemoryUsage(b *testing.B) { fieldTypes := make([]*types.FieldType, 0, 4) fieldTypes = append(fieldTypes, &types.FieldType{Tp: mysql.TypeFloat}) From 577f6763d8e5d5b79c29a6bc219dfa3fdedfd624 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Tue, 16 Oct 2018 17:10:37 +0800 Subject: [PATCH 02/10] make golint happy --- util/chunk/chunk.go | 2 +- util/chunk/list.go | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index a35722c3cb204..bb191ed69b4f6 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -305,7 +305,7 @@ func (c *Chunk) PreAlloc4Row(row Row) { c.numVirtualRows++ } -// Insert inserts row on rowIdx. +// Insert inserts `row` on the position specified by `rowIdx`. // Note: Insert will cover the origin data, it should be called after // PreAlloc4Row. func (c *Chunk) Insert(rowIdx int, row Row) { diff --git a/util/chunk/list.go b/util/chunk/list.go index d286502a473ef..f49383cd95974 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -159,6 +159,9 @@ func (l *List) PreAlloc4Row(row Row) (ptr RowPtr) { return RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)} } +// Insert inserts `row` on the position specified by `ptr`. +// Note: Insert will cover the origin data, it should be called after +// PreAlloc4Row. func (l *List) Insert(ptr RowPtr, row Row) { l.chunks[ptr.ChkIdx].Insert(int(ptr.RowIdx), row) } From 837a517d9c452411b5d432d50a64cbd21bb1b219 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 11:53:00 +0800 Subject: [PATCH 03/10] address comment --- util/chunk/chunk.go | 46 +++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index bb191ed69b4f6..fb24573a078f6 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -279,30 +279,40 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } // PreAlloc4Row pre-allocates the memory space for a Row. -// The null elem info will be pre-written. +// The nullBitMap of c.columns will be pre-written. func (c *Chunk) PreAlloc4Row(row Row) { - for i, rowCol := range row.c.columns { - chkCol := c.columns[i] - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) - if rowCol.isFixed() { - elemLen := len(rowCol.elemBuf) - if len(chkCol.data)+elemLen >= cap(chkCol.data) { - chkCol.data = make([]byte, len(chkCol.data)+elemLen, 2*cap(chkCol.data)) + for i, srcCol := range row.c.columns { + dstCol := c.columns[i] + dstCol.appendNullBitmap(!srcCol.isNull(row.idx)) + elemLen := len(srcCol.elemBuf) + if !srcCol.isFixed() { + elemLen = int(srcCol.offsets[row.idx+1] - srcCol.offsets[row.idx]) + dstCol.offsets = append(dstCol.offsets, int32(len(dstCol.data)+elemLen)) + } + if needCap := len(dstCol.data) + elemLen; needCap > cap(dstCol.data) { + // Grow the capacity according to golang.growslice. + newCap := cap(dstCol.data) + doubleCap := newCap << 1 + if needCap > doubleCap { + newCap = needCap } else { - (*reflect.SliceHeader)(unsafe.Pointer(&chkCol.data)).Len = len(chkCol.data) + elemLen + if len(dstCol.data) < 1024 { + newCap = doubleCap + } else { + for 0 < newCap && newCap < needCap { + newCap += newCap / 4 + } + if newCap <= 0 { + newCap = needCap + } + } } + dstCol.data = make([]byte, len(dstCol.data)+elemLen, newCap) } else { - elemLen := int(rowCol.offsets[row.idx+1] - rowCol.offsets[row.idx]) - if len(chkCol.data)+elemLen >= cap(chkCol.data) { - chkCol.data = make([]byte, len(chkCol.data)+elemLen, 2*cap(chkCol.data)) - } else { - (*reflect.SliceHeader)(unsafe.Pointer(&chkCol.data)).Len = len(chkCol.data) + elemLen - } - chkCol.offsets = append(chkCol.offsets, int32(len(chkCol.data))) + (*reflect.SliceHeader)(unsafe.Pointer(&dstCol.data)).Len = len(dstCol.data) + elemLen } - chkCol.length++ + dstCol.length++ } - c.numVirtualRows++ } // Insert inserts `row` on the position specified by `rowIdx`. From 6eb77ad9d47004735811458cd15e921856f2b118 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 17:07:59 +0800 Subject: [PATCH 04/10] address comment --- util/chunk/chunk.go | 63 +++++++++++++++++++++------------------------ util/chunk/list.go | 7 +++-- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index fb24573a078f6..e4e8077a3c0dc 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -15,12 +15,12 @@ package chunk import ( "encoding/binary" - "reflect" "unsafe" "github.com/cznic/mathutil" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" + "reflect" ) // Chunk stores multiple rows of data in Apache Arrow format. @@ -279,7 +279,7 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } // PreAlloc4Row pre-allocates the memory space for a Row. -// The nullBitMap of c.columns will be pre-written. +// Nothing except for the nullBitMap of c.columns will be pre-written. func (c *Chunk) PreAlloc4Row(row Row) { for i, srcCol := range row.c.columns { dstCol := c.columns[i] @@ -289,29 +289,30 @@ func (c *Chunk) PreAlloc4Row(row Row) { elemLen = int(srcCol.offsets[row.idx+1] - srcCol.offsets[row.idx]) dstCol.offsets = append(dstCol.offsets, int32(len(dstCol.data)+elemLen)) } - if needCap := len(dstCol.data) + elemLen; needCap > cap(dstCol.data) { - // Grow the capacity according to golang.growslice. - newCap := cap(dstCol.data) - doubleCap := newCap << 1 - if needCap > doubleCap { - newCap = needCap + dstCol.length++ + needCap := len(dstCol.data) + elemLen + if needCap <= cap(dstCol.data) { + (*reflect.SliceHeader)(unsafe.Pointer(&dstCol.data)).Len = len(dstCol.data) + elemLen + continue + } + // Grow the capacity according to golang.growslice. + newCap := cap(dstCol.data) + doubleCap := newCap << 1 + if needCap > doubleCap { + newCap = needCap + } else { + if len(dstCol.data) < 1024 { + newCap = doubleCap } else { - if len(dstCol.data) < 1024 { - newCap = doubleCap - } else { - for 0 < newCap && newCap < needCap { - newCap += newCap / 4 - } - if newCap <= 0 { - newCap = needCap - } + for 0 < newCap && newCap < needCap { + newCap += newCap / 4 + } + if newCap <= 0 { + newCap = needCap } } - dstCol.data = make([]byte, len(dstCol.data)+elemLen, newCap) - } else { - (*reflect.SliceHeader)(unsafe.Pointer(&dstCol.data)).Len = len(dstCol.data) + elemLen } - dstCol.length++ + dstCol.data = make([]byte, len(dstCol.data)+elemLen, newCap) } } @@ -321,24 +322,18 @@ func (c *Chunk) PreAlloc4Row(row Row) { func (c *Chunk) Insert(rowIdx int, row Row) { // Check data length between row and the origin data for every column. // Cover the origin data if the upper check is valid. - for i, rowCol := range row.c.columns { - chkCol := c.columns[i] - if chkCol.isFixed() != rowCol.isFixed() { - panic("unexcepted error happens during Chunk.Insert") - } + for i, srcCol := range row.c.columns { + dstCol := c.columns[i] var srcStart, srcEnd, destStart, destEnd int - if rowCol.isFixed() { - srcElemLen, destElemLen := len(rowCol.elemBuf), len(chkCol.elemBuf) + if srcCol.isFixed() { + srcElemLen, destElemLen := len(srcCol.elemBuf), len(dstCol.elemBuf) srcStart, destStart = row.idx*srcElemLen, rowIdx*destElemLen srcEnd, destEnd = srcStart+srcElemLen, destStart+destElemLen } else { - srcStart, srcEnd = int(rowCol.offsets[row.idx]), int(rowCol.offsets[row.idx+1]) - destStart, destEnd = int(chkCol.offsets[rowIdx]), int(chkCol.offsets[rowIdx+1]) - } - if destEnd-destStart != srcEnd-srcStart { - panic("unexcepted error happens during Chunk.Insert") + srcStart, srcEnd = int(srcCol.offsets[row.idx]), int(srcCol.offsets[row.idx+1]) + destStart, destEnd = int(dstCol.offsets[rowIdx]), int(dstCol.offsets[rowIdx+1]) } - copy(chkCol.data[destStart:destEnd], rowCol.data[srcStart:srcEnd]) + copy(dstCol.data[destStart:destEnd], srcCol.data[srcStart:srcEnd]) } } diff --git a/util/chunk/list.go b/util/chunk/list.go index f49383cd95974..34431e8bbaf27 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -140,10 +140,13 @@ func (l *List) Reset() { l.consumedIdx = -1 } -// PreAlloc4Row pre-allocate the storage memory for a Row. +// PreAlloc4Row pre-allocates the storage memory for a Row. +// Note: this function will *ONLY* allocate the needed memory for `row`, the +// data will *NOT* be written into the List. List.Insert can be called to write +// the data later on. func (l *List) PreAlloc4Row(row Row) (ptr RowPtr) { chkIdx := len(l.chunks) - 1 - if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() || chkIdx == l.consumedIdx { + if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() { newChk := l.allocChunk() l.chunks = append(l.chunks, newChk) if chkIdx != l.consumedIdx { From f91ebc055c5f704fce5cddd4a9b38e2c78a500fe Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 17:09:40 +0800 Subject: [PATCH 05/10] tiny change --- util/chunk/chunk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index e4e8077a3c0dc..af16b5acc906b 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -15,12 +15,12 @@ package chunk import ( "encoding/binary" + "reflect" "unsafe" "github.com/cznic/mathutil" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" - "reflect" ) // Chunk stores multiple rows of data in Apache Arrow format. From 79e6eb036cf77cbefb7db8dbca15d2690567e68f Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 17:10:43 +0800 Subject: [PATCH 06/10] tiny change --- util/chunk/list_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/chunk/list_test.go b/util/chunk/list_test.go index 59ec4ca98e5bb..81ac3f3b29d52 100644 --- a/util/chunk/list_test.go +++ b/util/chunk/list_test.go @@ -15,6 +15,8 @@ package chunk import ( "math" + "strconv" + "strings" "testing" "time" @@ -22,8 +24,6 @@ import ( "github.com/pingcap/tidb/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" - "strconv" - "strings" ) func (s *testChunkSuite) TestList(c *check.C) { From 52497c090c5e81ab847329170362a97887dff7c3 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 17:12:57 +0800 Subject: [PATCH 07/10] remove useless comment --- util/chunk/chunk.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index af16b5acc906b..0996fb8c04d83 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -320,8 +320,6 @@ func (c *Chunk) PreAlloc4Row(row Row) { // Note: Insert will cover the origin data, it should be called after // PreAlloc4Row. func (c *Chunk) Insert(rowIdx int, row Row) { - // Check data length between row and the origin data for every column. - // Cover the origin data if the upper check is valid. for i, srcCol := range row.c.columns { dstCol := c.columns[i] var srcStart, srcEnd, destStart, destEnd int From b73c2e96e774817a4edb97a5007c32d691205afb Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 17:35:26 +0800 Subject: [PATCH 08/10] address comment --- util/chunk/chunk.go | 15 +++++++++++---- util/chunk/chunk_test.go | 6 +++--- util/chunk/list.go | 6 +++--- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 0996fb8c04d83..20720b67cf34d 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -278,9 +278,13 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { } } -// PreAlloc4Row pre-allocates the memory space for a Row. -// Nothing except for the nullBitMap of c.columns will be pre-written. -func (c *Chunk) PreAlloc4Row(row Row) { +// PreAlloc pre-allocates the memory space in a Chunk to store the Row. +// NOTE: +// 1. The Chunk must be empty or holds no useful data. +// 2. The schema of the Row must be the same with the Chunk. +// 3. This API is paired with the `Insert()` function, which inserts all the +// rows data into the Chunk after the pre-allocation. +func (c *Chunk) PreAlloc(row Row) { for i, srcCol := range row.c.columns { dstCol := c.columns[i] dstCol.appendNullBitmap(!srcCol.isNull(row.idx)) @@ -318,9 +322,12 @@ func (c *Chunk) PreAlloc4Row(row Row) { // Insert inserts `row` on the position specified by `rowIdx`. // Note: Insert will cover the origin data, it should be called after -// PreAlloc4Row. +// PreAlloc. func (c *Chunk) Insert(rowIdx int, row Row) { for i, srcCol := range row.c.columns { + if row.IsNull(i) { + continue + } dstCol := c.columns[i] var srcStart, srcEnd, destStart, destEnd int if srcCol.isFixed() { diff --git a/util/chunk/chunk_test.go b/util/chunk/chunk_test.go index b1b679e982bfc..4edce4b9f35ea 100644 --- a/util/chunk/chunk_test.go +++ b/util/chunk/chunk_test.go @@ -536,10 +536,10 @@ func (s *testChunkSuite) TestPreAlloc4RowAndInsert(c *check.C) { destChk := NewChunkWithCapacity(fieldTypes, 3) - // Test Chunk.PreAlloc4Row. + // Test Chunk.PreAlloc. for i := 0; i < srcChk.NumRows(); i++ { c.Assert(destChk.NumRows(), check.Equals, i) - destChk.PreAlloc4Row(srcChk.GetRow(i)) + destChk.PreAlloc(srcChk.GetRow(i)) } for i, srcCol := range srcChk.columns { destCol := destChk.columns[i] @@ -590,7 +590,7 @@ func (s *testChunkSuite) TestPreAlloc4RowAndInsert(c *check.C) { startWg, endWg := &sync.WaitGroup{}, &sync.WaitGroup{} startWg.Add(1) for i := 0; i < srcChk.NumRows(); i++ { - destChk.PreAlloc4Row(srcChk.GetRow(i)) + destChk.PreAlloc(srcChk.GetRow(i)) endWg.Add(1) go func(rowIdx int) { defer func() { diff --git a/util/chunk/list.go b/util/chunk/list.go index 34431e8bbaf27..ea130c0e61256 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -140,7 +140,7 @@ func (l *List) Reset() { l.consumedIdx = -1 } -// PreAlloc4Row pre-allocates the storage memory for a Row. +// PreAlloc pre-allocates the storage memory for a Row. // Note: this function will *ONLY* allocate the needed memory for `row`, the // data will *NOT* be written into the List. List.Insert can be called to write // the data later on. @@ -157,14 +157,14 @@ func (l *List) PreAlloc4Row(row Row) (ptr RowPtr) { } chk := l.chunks[chkIdx] rowIdx := chk.NumRows() - chk.PreAlloc4Row(row) + chk.PreAlloc(row) l.length++ return RowPtr{ChkIdx: uint32(chkIdx), RowIdx: uint32(rowIdx)} } // Insert inserts `row` on the position specified by `ptr`. // Note: Insert will cover the origin data, it should be called after -// PreAlloc4Row. +// PreAlloc. func (l *List) Insert(ptr RowPtr, row Row) { l.chunks[ptr.ChkIdx].Insert(int(ptr.RowIdx), row) } From 0ac64764f8e43b0763dcec1e4944defa33dc11fe Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 18:02:42 +0800 Subject: [PATCH 09/10] fix ci --- util/chunk/list.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/list.go b/util/chunk/list.go index ea130c0e61256..1f1e167c2be87 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -140,7 +140,7 @@ func (l *List) Reset() { l.consumedIdx = -1 } -// PreAlloc pre-allocates the storage memory for a Row. +// PreAlloc4Row pre-allocates the storage memory for a Row. // Note: this function will *ONLY* allocate the needed memory for `row`, the // data will *NOT* be written into the List. List.Insert can be called to write // the data later on. From f5e40885a64c65206bc99a7ee062cb2586ab9072 Mon Sep 17 00:00:00 2001 From: xuhuaiyu <391585975@qq.com> Date: Thu, 18 Oct 2018 19:45:25 +0800 Subject: [PATCH 10/10] address comment --- util/chunk/chunk.go | 4 ++++ util/chunk/list.go | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 20720b67cf34d..3b91cf3bb693d 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -284,6 +284,10 @@ func (c *Chunk) AppendPartialRow(colIdx int, row Row) { // 2. The schema of the Row must be the same with the Chunk. // 3. This API is paired with the `Insert()` function, which inserts all the // rows data into the Chunk after the pre-allocation. +// 4. We set the null bitmap here instead of in the Insert() function because +// when the Insert() function is called parallelly, the data race on a byte +// can not be avoided although the manipulated bits are different inside a +// byte. func (c *Chunk) PreAlloc(row Row) { for i, srcCol := range row.c.columns { dstCol := c.columns[i] diff --git a/util/chunk/list.go b/util/chunk/list.go index 1f1e167c2be87..4acee7d199216 100644 --- a/util/chunk/list.go +++ b/util/chunk/list.go @@ -141,9 +141,11 @@ func (l *List) Reset() { } // PreAlloc4Row pre-allocates the storage memory for a Row. -// Note: this function will *ONLY* allocate the needed memory for `row`, the -// data will *NOT* be written into the List. List.Insert can be called to write -// the data later on. +// NOTE: +// 1. The List must be empty or holds no useful data. +// 2. The schema of the Row must be the same with the List. +// 3. This API is paired with the `Insert()` function, which inserts all the +// rows data into the List after the pre-allocation. func (l *List) PreAlloc4Row(row Row) (ptr RowPtr) { chkIdx := len(l.chunks) - 1 if chkIdx == -1 || l.chunks[chkIdx].NumRows() >= l.chunks[chkIdx].Capacity() {