Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
improve baseDeltaBlockPostings
Browse files Browse the repository at this point in the history
Signed-off-by: naivewong <867245430@qq.com>
  • Loading branch information
naivewong committed Jun 28, 2019
1 parent ce55654 commit b3f2b5e
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 120 deletions.
2 changes: 1 addition & 1 deletion index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
return n, newDeltaBlockPostings(l, n), d.Err()
case 4:
l := d.Get()
return n, newBaseDeltaBlockPostings(l, n), d.Err()
return n, newBaseDeltaBlockPostings(l), d.Err()
case 5:
l := d.Get()
return n, newBitmapPostings(l), d.Err()
Expand Down
229 changes: 126 additions & 103 deletions index/postings.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package index

import (
// "time"
// "fmt"
"container/heap"
"encoding/binary"
"math/bits"
Expand Down Expand Up @@ -693,7 +695,7 @@ func (it *bigEndianPostings) Err() error {
}

// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings, 6 is roaringBitmapPostings.
const postingsType = 2
const postingsType = 4

type bitSlice struct {
bstream []byte
Expand Down Expand Up @@ -812,7 +814,8 @@ func (it *baseDeltaPostings) Err() error {
return nil
}

const deltaBlockSize = 256
const deltaBlockSize = 32
const deltaBlockBits = 5

// Block format(delta is to the previous value).
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
Expand Down Expand Up @@ -970,71 +973,55 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
}

// Block format(delta is to the base).
// ┌────────────────┬───────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
// │ base <uvarint> │ idx <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bits> │ ... │ delta n <bits> │
// └────────────────┴───────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
// ┌────────────────┬─────────────────┬────────────┬────────────────┬─────┬────────────────┐
// │ base <uvarint> │ count <uvarint> │ width <1b> │ delta 1 <bytes> │ ... │ delta n <bytes> │
// └────────────────┴─────────────────┴────────────┴────────────────┴─────┴────────────────┘
type baseDeltaBlockPostings struct {
bs bitSlice
size int
count int // count in current block.
idxBlock int
idx int
offset int // offset in bit.
cur uint64
base uint64
mask uint32
prel int
}

func newBaseDeltaBlockPostings(bstream []byte, size int) *baseDeltaBlockPostings {
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}, size: size}
}

func (it *baseDeltaBlockPostings) GetOff() int {
return it.offset
}
func (it *baseDeltaBlockPostings) GetWidth() int {
return it.bs.width
func newBaseDeltaBlockPostings(bstream []byte) *baseDeltaBlockPostings {
return &baseDeltaBlockPostings{bs: bitSlice{bstream: bstream}}
}

func (it *baseDeltaBlockPostings) At() uint64 {
return it.cur
}

func (it *baseDeltaBlockPostings) Next() bool {
if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size {
if it.offset >= len(it.bs.bstream) {
return false
}
if it.offset%(deltaBlockSize<<3) == 0 {
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
if n < 1 {
return false
}
if it.offset%deltaBlockSize == 0 {
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
it.cur = val
it.base = val
it.offset += n << 3
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
if n < 1 {
return false
}
it.idx = int(val) + 1
it.offset += n << 3
val, n = binary.Uvarint(it.bs.bstream[it.offset>>3:])
if n < 1 {
return false
}
it.offset += n

val, n = binary.Uvarint(it.bs.bstream[it.offset:])
it.count = int(val)
it.offset += n << 3
it.bs.width = int(it.bs.bstream[it.offset>>3])
it.offset += 8
it.offset += n
it.bs.width = int(it.bs.bstream[it.offset])
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
it.prel = 4 - it.bs.width
it.offset += 1
it.idxBlock = 1
return true
}

it.cur = it.bs.readBits(it.offset) + it.base
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
it.offset += it.bs.width
it.idx += 1
it.idxBlock += 1
if it.idxBlock == it.count {
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
it.offset = (((it.offset-1)>>deltaBlockBits) + 1) << deltaBlockBits
}
return true
}
Expand All @@ -1043,69 +1030,109 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
if it.cur >= x {
return true
}

startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1
// Do binary search between current position and end.
i := sort.Search(num, func(i int) bool {
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
return val > x
})
if i > 0 {
// Go to the previous block because the previous block
// may contain the first value >= x.
i -= 1
if it.offset >= len(it.bs.bstream) {
return false
}
startOff := (((it.offset)>>deltaBlockBits)+1)<<deltaBlockBits
num := (len(it.bs.bstream)>>deltaBlockBits) - (startOff>>deltaBlockBits) + 1
if num > 0 {
// Fast path to check if the binary search among blocks is needed.
val, _ := binary.Uvarint(it.bs.bstream[startOff:])
if val <= x {
// Do binary search between current position and end.
i := sort.Search(num, func(i int) bool {
val, _ := binary.Uvarint(it.bs.bstream[startOff+(i<<deltaBlockBits):])
return val > x
})
if i > 0 {
// Go to the previous block because the previous block
// may contain the first value >= x.
i -= 1
}
it.offset = startOff + (i<<deltaBlockBits)

// Read base, and width.
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
it.cur = val
it.base = val
it.offset += n
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
it.count = int(val)
it.offset += n
it.bs.width = int(it.bs.bstream[it.offset])
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
it.prel = 4 - it.bs.width
it.offset += 1
it.idxBlock = 1
if x <= it.base {
return true
} else {
temp := x - it.base
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
})
if j < it.count-it.idxBlock {
it.offset += j * it.bs.width
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
it.idxBlock += j + 1
if it.idxBlock == it.count {
// it.offset = startOff + ((i+1)<<deltaBlockBits)
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
} else {
it.offset += it.bs.width
}
} else {
// it.offset = startOff + ((i+1)<<deltaBlockBits)
it.offset = ((startOff>>deltaBlockBits)+i+1)<<deltaBlockBits
return it.Next()
}
return true
}
}
}

if i == 0 && it.idx > 0 {
// Search in current block.
startOff -= deltaBlockSize
if it.offset == startOff {
// Read base, and width.
val, n := binary.Uvarint(it.bs.bstream[it.offset:])
it.cur = val
it.base = val
it.offset += n
val, n = binary.Uvarint(it.bs.bstream[it.offset:])
it.count = int(val)
it.offset += n
it.bs.width = int(it.bs.bstream[it.offset])
it.mask = (uint32(1) << uint(8 * it.bs.width)) - 1
it.prel = 4 - it.bs.width
it.offset += 1
it.idxBlock = 1
}
if x <= it.base {
return true
} else {
temp := x - it.base
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
return uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset+i*it.bs.width-it.prel:])&it.mask) >= temp
})

if j < it.count-it.idxBlock {
it.offset += j * it.bs.width
it.cur = it.bs.readBits(it.offset) + it.base
it.offset += it.bs.width
it.cur = uint64(binary.BigEndian.Uint32(it.bs.bstream[it.offset-it.prel:])&it.mask) + it.base
it.idxBlock += j + 1
it.idx += j + 1
if it.idxBlock == it.count {
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
// it.offset = startOff + deltaBlockSize
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
} else {
it.offset += it.bs.width
}
} else {
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
// it.offset = startOff + deltaBlockSize
it.offset = ((startOff>>deltaBlockBits)+1)<<deltaBlockBits
return it.Next()
}
return true
} else {
it.offset = (startOff + i*deltaBlockSize) << 3

// Read base, idx, and width.
it.Next()
if x <= it.base {
return true
} else {
temp := x - it.base
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
})

if j < it.count-it.idxBlock {
it.offset += j * it.bs.width
it.cur = it.bs.readBits(it.offset) + it.base
it.offset += it.bs.width
it.idxBlock += j + 1
it.idx += j + 1
if it.idxBlock == it.count {
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
}
} else {
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
return it.Next()
}
return true
}
}

}

func (it *baseDeltaBlockPostings) Err() error {
Expand All @@ -1121,16 +1148,18 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
var max int
for i < len(arr) {
e.PutUvarint32(arr[i]) // Put base.
e.PutUvarint64(uint64(i)) // Put idx.
remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3
remaining = deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1
deltas = deltas[:0]
base = arr[i]
max = -1
i += 1
for i < len(arr) {
delta := arr[i] - base
cur := bits.Len32(delta)
if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 {
cur := (bits.Len32(delta) + 7) >> 3
if cur == 0 {
cur = 1
}
if remaining-cur*(len(deltas)+1)-((bits.Len(uint(len(deltas)))>>3)+1) >= 0 {
deltas = append(deltas, delta)
max = cur
} else {
Expand All @@ -1140,28 +1169,22 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
}
e.PutUvarint64(uint64(len(deltas) + 1))
e.PutByte(byte(max))
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1) << 3
remaining -= ((bits.Len(uint(len(deltas))) >> 3) + 1)
for _, delta := range deltas {
e.PutBits(uint64(delta), max)
for j := max - 1; j >= 0; j-- {
e.B = append(e.B, byte((delta>>(8*uint(j))&0xff)))
}
remaining -= max
}

if i == len(arr) {
break
}

for remaining >= 64 {
e.PutBits(uint64(0), 64)
remaining -= 64
}

if remaining > 0 {
e.PutBits(uint64(0), remaining)
for remaining > 0 {
e.PutByte(0)
remaining -= 1
}
e.Count = 0

// There can be one more extra 0.
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
}
}

Expand Down
Loading

0 comments on commit b3f2b5e

Please sign in to comment.