-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat #9212: add ability to generate shard digests
- Loading branch information
Showing
8 changed files
with
529 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package tsm1 | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"math" | ||
"os" | ||
"path/filepath" | ||
"sort" | ||
) | ||
|
||
type DigestOptions struct { | ||
MinTime, MaxTime int64 | ||
MinKey, MaxKey []byte | ||
} | ||
|
||
// DigestWithOptions writes a digest of dir to w using options to filter by | ||
// time and key range. | ||
func DigestWithOptions(dir string, opts DigestOptions, w io.WriteCloser) error { | ||
if dir == "" { | ||
return fmt.Errorf("dir is required") | ||
} | ||
|
||
files, err := filepath.Glob(filepath.Join(dir, fmt.Sprintf("*.%s", TSMFileExtension))) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
readers := make([]*TSMReader, 0, len(files)) | ||
|
||
for _, fi := range files { | ||
f, err := os.Open(fi) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
r, err := NewTSMReader(f) | ||
if err != nil { | ||
return err | ||
} | ||
readers = append(readers, r) | ||
} | ||
|
||
ch := make([]chan seriesKey, 0, len(files)) | ||
for _, fi := range files { | ||
f, err := os.Open(fi) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
r, err := NewTSMReader(f) | ||
if err != nil { | ||
return err | ||
} | ||
defer r.Close() | ||
|
||
s := make(chan seriesKey) | ||
ch = append(ch, s) | ||
go func() { | ||
for i := 0; i < r.KeyCount(); i++ { | ||
key, typ := r.KeyAt(i) | ||
if len(opts.MinKey) > 0 && bytes.Compare(key, opts.MinKey) < 0 { | ||
continue | ||
} | ||
|
||
if len(opts.MaxKey) > 0 && bytes.Compare(key, opts.MaxKey) > 0 { | ||
continue | ||
} | ||
|
||
s <- seriesKey{key: key, typ: typ} | ||
} | ||
close(s) | ||
}() | ||
|
||
} | ||
|
||
dw, err := NewDigestWriter(w) | ||
if err != nil { | ||
return err | ||
} | ||
defer dw.Close() | ||
|
||
var n int | ||
for key := range merge(ch...) { | ||
|
||
ts := &DigestTimeSpan{} | ||
n++ | ||
kstr := string(key.key) | ||
|
||
for _, r := range readers { | ||
entries := r.Entries(key.key) | ||
for _, entry := range entries { | ||
crc, b, err := r.ReadBytes(&entry, nil) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
// Filter blocks that are outside the time filter. If they overlap, we | ||
// still include them. | ||
if entry.MaxTime < opts.MinTime || entry.MinTime > opts.MaxTime { | ||
continue | ||
} | ||
|
||
cnt := BlockCount(b) | ||
ts.Add(entry.MinTime, entry.MaxTime, cnt, crc) | ||
} | ||
} | ||
|
||
sort.Sort(ts) | ||
if err := dw.WriteTimeSpan(kstr, ts); err != nil { | ||
return err | ||
} | ||
} | ||
return dw.Close() | ||
} | ||
|
||
// Digest writes a digest of dir to w of a full shard dir. | ||
func Digest(dir string, w io.WriteCloser) error { | ||
return DigestWithOptions(dir, DigestOptions{ | ||
MinTime: math.MinInt64, | ||
MaxTime: math.MaxInt64, | ||
}, w) | ||
} | ||
|
||
type rwPair struct { | ||
r *TSMReader | ||
w TSMWriter | ||
outf *os.File | ||
} | ||
|
||
func (rw *rwPair) close() { | ||
rw.r.Close() | ||
rw.w.Close() | ||
rw.outf.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package tsm1 | ||
|
||
import ( | ||
"bufio" | ||
"compress/gzip" | ||
"encoding/binary" | ||
"io" | ||
) | ||
|
||
type DigestReader struct { | ||
io.ReadCloser | ||
} | ||
|
||
func NewDigestReader(r io.ReadCloser) (*DigestReader, error) { | ||
gr, err := gzip.NewReader(bufio.NewReader(r)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &DigestReader{ReadCloser: gr}, nil | ||
} | ||
|
||
func (w *DigestReader) ReadTimeSpan() (string, *DigestTimeSpan, error) { | ||
var n uint16 | ||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &n); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
b := make([]byte, n) | ||
if _, err := io.ReadFull(w.ReadCloser, b); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
var cnt uint32 | ||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &cnt); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
ts := &DigestTimeSpan{} | ||
for i := 0; i < int(cnt); i++ { | ||
var min, max int64 | ||
var crc uint32 | ||
|
||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &min); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &max); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &crc); err != nil { | ||
return "", nil, err | ||
} | ||
|
||
if err := binary.Read(w.ReadCloser, binary.BigEndian, &n); err != nil { | ||
return "", nil, err | ||
} | ||
ts.Add(min, max, int(n), crc) | ||
} | ||
|
||
return string(b), ts, nil | ||
} | ||
|
||
func (w *DigestReader) Close() error { | ||
return w.ReadCloser.Close() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
package tsm1 | ||
|
||
import ( | ||
"compress/gzip" | ||
"encoding/binary" | ||
"io" | ||
) | ||
|
||
type writeFlushCloser interface { | ||
Close() error | ||
Write(b []byte) (int, error) | ||
Flush() error | ||
} | ||
|
||
// DigestWriter allows for writing a digest of a shard. A digest is a condensed | ||
// representation of the contents of a shard. It can be scoped to one or more series | ||
// keys, ranges of times or sets of files. | ||
type DigestWriter struct { | ||
F writeFlushCloser | ||
} | ||
|
||
func NewDigestWriter(w io.WriteCloser) (*DigestWriter, error) { | ||
gw := gzip.NewWriter(w) | ||
return &DigestWriter{F: gw}, nil | ||
} | ||
|
||
func (w *DigestWriter) WriteTimeSpan(key string, t *DigestTimeSpan) error { | ||
if err := binary.Write(w.F, binary.BigEndian, uint16(len(key))); err != nil { | ||
return err | ||
} | ||
|
||
if _, err := w.F.Write([]byte(key)); err != nil { | ||
return err | ||
} | ||
|
||
if err := binary.Write(w.F, binary.BigEndian, uint32(t.Len())); err != nil { | ||
return err | ||
} | ||
|
||
for _, tr := range t.Ranges { | ||
if err := binary.Write(w.F, binary.BigEndian, tr.Min); err != nil { | ||
return err | ||
} | ||
|
||
if err := binary.Write(w.F, binary.BigEndian, tr.Max); err != nil { | ||
return err | ||
} | ||
|
||
if err := binary.Write(w.F, binary.BigEndian, tr.CRC); err != nil { | ||
return err | ||
} | ||
|
||
if err := binary.Write(w.F, binary.BigEndian, uint16(tr.N)); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (w *DigestWriter) Flush() error { | ||
return w.F.Flush() | ||
} | ||
|
||
func (w *DigestWriter) Close() error { | ||
if err := w.Flush(); err != nil { | ||
return err | ||
} | ||
return w.F.Close() | ||
} | ||
|
||
type DigestTimeSpan struct { | ||
Ranges []DigestTimeRange | ||
} | ||
|
||
func (a DigestTimeSpan) Len() int { return len(a.Ranges) } | ||
func (a DigestTimeSpan) Swap(i, j int) { a.Ranges[i], a.Ranges[j] = a.Ranges[j], a.Ranges[i] } | ||
func (a DigestTimeSpan) Less(i, j int) bool { | ||
return a.Ranges[i].Min < a.Ranges[j].Min | ||
} | ||
|
||
func (t *DigestTimeSpan) Add(min, max int64, n int, crc uint32) { | ||
for _, v := range t.Ranges { | ||
if v.Min == min && v.Max == max && v.N == n && v.CRC == crc { | ||
return | ||
} | ||
} | ||
t.Ranges = append(t.Ranges, DigestTimeRange{Min: min, Max: max, N: n, CRC: crc}) | ||
} | ||
|
||
type DigestTimeRange struct { | ||
Min, Max int64 | ||
N int | ||
CRC uint32 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.