Skip to content

Commit

Permalink
Batch oriented timestamp encoders
Browse files Browse the repository at this point in the history
This commit adds a tsm1 function for encoding a batch of timestamps into a
provided buffer.

The following benchmarks compare the performance of the existing
iterator based encoders, and the new batch oriented encoders. They look
at a sequential input slice, a randomly generated input slice and a
duplicate slice. All slices are sorted.

name                       old time/op    new time/op    delta
EncodeTimestamps/10_seq       153ns ± 2%     104ns ± 2%  -31.62%  (p=0.000 n=9+10)
EncodeTimestamps/10_ran       191ns ± 2%     142ns ± 0%  -25.73%  (p=0.000 n=10+9)
EncodeTimestamps/10_dup       114ns ± 1%      68ns ± 4%  -39.77%  (p=0.000 n=8+10)
EncodeTimestamps/100_seq      704ns ± 2%     321ns ± 2%  -54.44%  (p=0.000 n=9+9)
EncodeTimestamps/100_ran     7.27µs ± 4%    7.01µs ± 2%   -3.59%  (p=0.000 n=10+10)
EncodeTimestamps/100_dup      756ns ± 3%     396ns ± 2%  -47.57%  (p=0.000 n=10+10)
EncodeTimestamps/1000_seq    6.32µs ± 1%    2.46µs ± 2%  -61.01%  (p=0.000 n=8+10)
EncodeTimestamps/1000_ran     108µs ± 0%      68µs ± 3%  -37.57%  (p=0.000 n=8+10)
EncodeTimestamps/1000_dup    7.26µs ± 1%    3.64µs ± 1%  -49.80%  (p=0.000 n=10+8)

name                       old alloc/op   new alloc/op   delta
EncodeTimestamps/10_seq       0.00B          0.00B          ~     (all equal)
EncodeTimestamps/10_ran       0.00B          0.00B          ~     (all equal)
EncodeTimestamps/10_dup       0.00B          0.00B          ~     (all equal)
EncodeTimestamps/100_seq      0.00B          0.00B          ~     (all equal)
EncodeTimestamps/100_ran      0.00B          0.00B          ~     (all equal)
EncodeTimestamps/100_dup      0.00B          0.00B          ~     (all equal)
EncodeTimestamps/1000_seq     0.00B          0.00B          ~     (all equal)
EncodeTimestamps/1000_ran     0.00B          0.00B          ~     (all equal)
EncodeTimestamps/1000_dup     0.00B          0.00B          ~     (all equal)

name                       old allocs/op  new allocs/op  delta
EncodeTimestamps/10_seq        0.00           0.00          ~     (all equal)
EncodeTimestamps/10_ran        0.00           0.00          ~     (all equal)
EncodeTimestamps/10_dup        0.00           0.00          ~     (all equal)
EncodeTimestamps/100_seq       0.00           0.00          ~     (all equal)
EncodeTimestamps/100_ran       0.00           0.00          ~     (all equal)
EncodeTimestamps/100_dup       0.00           0.00          ~     (all equal)
EncodeTimestamps/1000_seq      0.00           0.00          ~     (all equal)
EncodeTimestamps/1000_ran      0.00           0.00          ~     (all equal)
EncodeTimestamps/1000_dup      0.00           0.00          ~     (all equal)
  • Loading branch information
e-dard committed Sep 21, 2018
1 parent 5a5ffe6 commit 5decd99
Show file tree
Hide file tree
Showing 2 changed files with 802 additions and 0 deletions.
122 changes: 122 additions & 0 deletions tsdb/engine/tsm1/batch_timestamp.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,128 @@ import (
"github.com/influxdata/influxdb/pkg/encoding/simple8b"
)

// TimeArrayEncodeAll encodes src into b, returning b and any error encountered.
// The returned slice may be of a different length and capactity to b.
//
// TimeArrayEncodeAll implements batch oriented versions of the three integer
// encoding types we support: uncompressed, simple8b and RLE.
//
// Timestamp values to be encoded should be sorted before encoding. When encoded,
// the values are first delta-encoded. The first value is the starting timestamp,
// subsequent values are the difference from the prior value.
//
// Important: TimeArrayEncodeAll modifies the contents of src by using it as
// scratch space for delta encoded values. It is NOT SAFE to use src after
// passing it into TimeArrayEncodeAll.
func TimeArrayEncodeAll(src []int64, b []byte) ([]byte, error) {
if len(src) == 0 {
return nil, nil // Nothing to do
}

var rle = true
var max, div = uint64(0), uint64(1e12)

// To prevent an allocation of the entire block we're encoding reuse the
// src slice to store the encoded deltas.
deltas := reintepretInt64ToUint64Slice(src)

for i := len(deltas) - 1; i > 0; i-- {
deltas[i] = deltas[i] - deltas[i-1]

v := deltas[i]
if v > max {
max = v
}

// If our value is divisible by 10, break. Otherwise, try the next smallest divisor.
for div > 1 && v%div != 0 {
div /= 10
}

// Skip the first value || see if prev = curr. The deltas can be RLE if the are all equal.
rle = i == len(deltas)-1 || rle && (deltas[i+1] == deltas[i])
}

// Deltas are the same - encode with RLE
if rle && len(deltas) > 1 {
// Large varints can take up to 10 bytes. We're storing 3 + 1
// type byte.
if len(b) < 31 && cap(b) >= 31 {
b = b[:31]
} else if len(b) < 31 {
b = append(b, make([]byte, 31-len(b))...)
}

// 4 high bits used for the encoding type
b[0] = byte(timeCompressedRLE) << 4
// 4 low bits are the log10 divisor
b[0] |= byte(math.Log10(float64(div)))

i := 1
// The first value
binary.BigEndian.PutUint64(b[i:], deltas[0])
i += 8
// The first delta
i += binary.PutUvarint(b[i:], deltas[1]/div)
// The number of times the delta is repeated
i += binary.PutUvarint(b[i:], uint64(len(deltas)))

return b[:i], nil
}

// We can't compress this time-range, the deltas exceed 1 << 60
if max > simple8b.MaxValue {
// Encode uncompressed.
sz := 1 + len(deltas)*8
if len(b) < sz && cap(b) >= sz {
b = b[:sz]
} else if len(b) < sz {
b = append(b, make([]byte, sz-len(b))...)
}

// 4 high bits of first byte store the encoding type for the block
b[0] = byte(timeUncompressed) << 4
for i, v := range deltas {
binary.BigEndian.PutUint64(b[1+i*8:1+i*8+8], v)
}
return b[:sz], nil
}

// Only apply the divisor if it's greater than 1 since division is expensive.
if div > 1 {
for i := 1; i < len(deltas); i++ {
deltas[i] /= div
}
}

// Encode with simple8b - fist value is written unencoded using 8 bytes.
encoded, err := simple8b.EncodeAll(deltas[1:])
if err != nil {
return nil, err
}

sz := 1 + (len(encoded)+1)*8
if len(b) < sz && cap(b) >= sz {
b = b[:sz]
} else if len(b) < sz {
b = append(b, make([]byte, sz-len(b))...)
}

// 4 high bits of first byte store the encoding type for the block
b[0] = byte(timeCompressedPackedSimple) << 4
// 4 low bits are the log10 divisor
b[0] |= byte(math.Log10(float64(div)))

// Write the first value since it's not part of the encoded values
binary.BigEndian.PutUint64(b[1:9], deltas[0])

// Write the encoded values
for i, v := range encoded {
binary.BigEndian.PutUint64(b[9+i*8:9+i*8+8], v)
}
return b[:sz], nil
}

var (
timeBatchDecoderFunc = [...]func(b []byte, dst []int64) ([]int64, error){
timeBatchDecodeAllUncompressed,
Expand Down
Loading

0 comments on commit 5decd99

Please sign in to comment.