From ae3eec0b4939a46b1696d81619e4d996a01ecd43 Mon Sep 17 00:00:00 2001
From: Bartek Plotka <bwplotka@gmail.com>
Date: Wed, 2 Jan 2019 17:56:22 +0000
Subject: [PATCH] Exposed helper methods for reading index bytes.

Changes:
* ReadSymbols, TOC and ReadOffsetTable are not public functions (used by Thanos).
* decbufXXX are now functions.
* More verbose errors.
* Removed unused crc32 field.
* Some var name changes to make it more verbose:
  * symbols -> allocatedSymbols
  * symbolsSlice -> symbolsV1
  * symbols -> symbolsV2
  *
* Pre-calculate symbolsTableSize.
* Initialized symbols for Symbols() method with valid length.
* Added test for Symbol method.
* Made Decoder LookupSymbol method public. Kept Decode public as it is useful as helper from index package.

Signed-off-by: Bartek Plotka <bwplotka@gmail.com>
---
 index/encoding_helpers.go |  56 +++++++
 index/index.go            | 340 ++++++++++++++++----------------------
 index/index_test.go       |  21 ++-
 3 files changed, 217 insertions(+), 200 deletions(-)

diff --git a/index/encoding_helpers.go b/index/encoding_helpers.go
index 602498f1..9104f1cb 100644
--- a/index/encoding_helpers.go
+++ b/index/encoding_helpers.go
@@ -18,6 +18,8 @@ import (
 	"hash"
 	"hash/crc32"
 	"unsafe"
+
+	"github.com/pkg/errors"
 )
 
 // enbuf is a helper type to populate a byte slice with various types.
@@ -86,6 +88,60 @@ type decbuf struct {
 	e error
 }
 
+// newDecbufAt returns a new decoding buffer. It expects the first 4 bytes
+// after offset to hold the big endian encoded content length, followed by the contents and the expected
+// checksum.
+func newDecbufAt(bs ByteSlice, off int) decbuf {
+	if bs.Len() < off+4 {
+		return decbuf{e: errInvalidSize}
+	}
+	b := bs.Range(off, off+4)
+	l := int(binary.BigEndian.Uint32(b))
+
+	if bs.Len() < off+4+l+4 {
+		return decbuf{e: errInvalidSize}
+	}
+
+	// Load bytes holding the contents plus a CRC32 checksum.
+	b = bs.Range(off+4, off+4+l+4)
+	dec := decbuf{b: b[:len(b)-4]}
+
+	if exp := binary.BigEndian.Uint32(b[len(b)-4:]); dec.crc32() != exp {
+		return decbuf{e: errInvalidChecksum}
+	}
+	return dec
+}
+
+// decbufUvarintAt returns a new decoding buffer. It expects the first bytes
+// after offset to hold the uvarint-encoded buffers length, followed by the contents and the expected
+// checksum.
+func newDecbufUvarintAt(bs ByteSlice, off int) decbuf {
+	// We never have to access this method at the far end of the byte slice. Thus just checking
+	// against the MaxVarintLen32 is sufficient.
+	if bs.Len() < off+binary.MaxVarintLen32 {
+		return decbuf{e: errInvalidSize}
+	}
+	b := bs.Range(off, off+binary.MaxVarintLen32)
+
+	l, n := binary.Uvarint(b)
+	if n <= 0 || n > binary.MaxVarintLen32 {
+		return decbuf{e: errors.Errorf("invalid uvarint %d", n)}
+	}
+
+	if bs.Len() < off+n+int(l)+4 {
+		return decbuf{e: errInvalidSize}
+	}
+
+	// Load bytes holding the contents plus a CRC32 checksum.
+	b = bs.Range(off+n, off+n+int(l)+4)
+	dec := decbuf{b: b[:len(b)-4]}
+
+	if dec.crc32() != binary.BigEndian.Uint32(b[len(b)-4:]) {
+		return decbuf{e: errInvalidChecksum}
+	}
+	return dec
+}
+
 func (d *decbuf) uvarint() int      { return int(d.uvarint64()) }
 func (d *decbuf) uvarint32() uint32 { return uint32(d.uvarint64()) }
 func (d *decbuf) be32int() int      { return int(d.be32()) }
diff --git a/index/index.go b/index/index.go
index 6413a9fc..c50613ce 100644
--- a/index/index.go
+++ b/index/index.go
@@ -39,6 +39,8 @@ const (
 	indexFormatV1 = 1
 	indexFormatV2 = 2
 
+	indexHeaderLen = 5
+
 	labelNameSeperator = "\xff"
 )
 
@@ -108,7 +110,7 @@ type Writer struct {
 	fbuf *bufio.Writer
 	pos  uint64
 
-	toc   indexTOC
+	toc   TOC
 	stage indexWriterStage
 
 	// Reusable memory.
@@ -129,13 +131,42 @@ type Writer struct {
 	Version int
 }
 
-type indexTOC struct {
-	symbols           uint64
-	series            uint64
-	labelIndices      uint64
-	labelIndicesTable uint64
-	postings          uint64
-	postingsTable     uint64
+// TOC represents index Table Of Content that states were each section of index starts.
+type TOC struct {
+	Symbols           uint64
+	Series            uint64
+	LabelIndices      uint64
+	LabelIndicesTable uint64
+	Postings          uint64
+	PostingsTable     uint64
+}
+
+// NewTOCFromByteSlice return parsed TOC from given index byte slice.
+func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) {
+	if bs.Len() < indexTOCLen {
+		return nil, errInvalidSize
+	}
+	b := bs.Range(bs.Len()-indexTOCLen, bs.Len())
+
+	expCRC := binary.BigEndian.Uint32(b[len(b)-4:])
+	d := decbuf{b: b[:len(b)-4]}
+
+	if d.crc32() != expCRC {
+		return nil, errors.Wrap(errInvalidChecksum, "read TOC")
+	}
+
+	if err := d.err(); err != nil {
+		return nil, err
+	}
+
+	return &TOC{
+		Symbols:           d.be64(),
+		Series:            d.be64(),
+		LabelIndices:      d.be64(),
+		LabelIndicesTable: d.be64(),
+		Postings:          d.be64(),
+		PostingsTable:     d.be64(),
+	}, nil
 }
 
 // NewWriter returns a new Writer to the given filename. It serializes data in format version 2.
@@ -223,22 +254,22 @@ func (w *Writer) ensureStage(s indexWriterStage) error {
 	// Mark start of sections in table of contents.
 	switch s {
 	case idxStageSymbols:
-		w.toc.symbols = w.pos
+		w.toc.Symbols = w.pos
 	case idxStageSeries:
-		w.toc.series = w.pos
+		w.toc.Series = w.pos
 
 	case idxStageLabelIndex:
-		w.toc.labelIndices = w.pos
+		w.toc.LabelIndices = w.pos
 
 	case idxStagePostings:
-		w.toc.postings = w.pos
+		w.toc.Postings = w.pos
 
 	case idxStageDone:
-		w.toc.labelIndicesTable = w.pos
+		w.toc.LabelIndicesTable = w.pos
 		if err := w.writeOffsetTable(w.labelIndexes); err != nil {
 			return err
 		}
-		w.toc.postingsTable = w.pos
+		w.toc.PostingsTable = w.pos
 		if err := w.writeOffsetTable(w.postings); err != nil {
 			return err
 		}
@@ -346,8 +377,6 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
 	}
 	sort.Strings(symbols)
 
-	const headerSize = 4
-
 	w.buf1.reset()
 	w.buf2.reset()
 
@@ -438,12 +467,12 @@ const indexTOCLen = 6*8 + 4
 func (w *Writer) writeTOC() error {
 	w.buf1.reset()
 
-	w.buf1.putBE64(w.toc.symbols)
-	w.buf1.putBE64(w.toc.series)
-	w.buf1.putBE64(w.toc.labelIndices)
-	w.buf1.putBE64(w.toc.labelIndicesTable)
-	w.buf1.putBE64(w.toc.postings)
-	w.buf1.putBE64(w.toc.postingsTable)
+	w.buf1.putBE64(w.toc.Symbols)
+	w.buf1.putBE64(w.toc.Series)
+	w.buf1.putBE64(w.toc.LabelIndices)
+	w.buf1.putBE64(w.toc.LabelIndicesTable)
+	w.buf1.putBE64(w.toc.Postings)
+	w.buf1.putBE64(w.toc.PostingsTable)
 
 	w.buf1.putHash(w.crc32)
 
@@ -535,15 +564,14 @@ type StringTuples interface {
 }
 
 type Reader struct {
-	// The underlying byte slice holding the encoded series data.
-	b   ByteSlice
-	toc indexTOC
+	b ByteSlice
 
 	// Close that releases the underlying resources of the byte slice.
 	c io.Closer
 
 	// Cached hashmaps of section offsets.
-	labels   map[string]uint64
+	labels map[string]uint64
+	// LabelName to LabelValue to offset map.
 	postings map[string]map[string]uint64
 	// Cache of read symbols. Strings that are returned when reading from the
 	// block are always backed by true strings held in here rather than
@@ -551,19 +579,17 @@ type Reader struct {
 	// prevents memory faults when applications work with read symbols after
 	// the block has been unmapped. The older format has sparse indexes so a map
 	// must be used, but the new format is not so we can use a slice.
-	symbols     map[uint32]string
-	symbolSlice []string
+	symbolsV1        map[uint32]string
+	symbolsV2        []string
+	symbolsTableSize uint64
 
 	dec *Decoder
 
-	crc32 hash.Hash32
-
 	version int
 }
 
 var (
 	errInvalidSize     = fmt.Errorf("invalid size")
-	errInvalidFlag     = fmt.Errorf("invalid flag")
 	errInvalidChecksum = fmt.Errorf("invalid checksum")
 )
 
@@ -587,7 +613,7 @@ func (b realByteSlice) Sub(start, end int) ByteSlice {
 	return b[start:end]
 }
 
-// NewReader returns a new IndexReader on the given byte slice. It automatically
+// NewReader returns a new index reader on the given byte slice. It automatically
 // handles different format versions.
 func NewReader(b ByteSlice) (*Reader, error) {
 	return newReader(b, nil)
@@ -606,14 +632,12 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 	r := &Reader{
 		b:        b,
 		c:        c,
-		symbols:  map[uint32]string{},
 		labels:   map[string]uint64{},
 		postings: map[string]map[string]uint64{},
-		crc32:    newCRC32(),
 	}
 
 	// Verify header.
-	if b.Len() < 5 {
+	if r.b.Len() < indexHeaderLen {
 		return nil, errors.Wrap(errInvalidSize, "index header")
 	}
 	if m := binary.BigEndian.Uint32(r.b.Range(0, 4)); m != MagicIndex {
@@ -625,50 +649,55 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) {
 		return nil, errors.Errorf("unknown index file version %d", r.version)
 	}
 
-	if err := r.readTOC(); err != nil {
+	toc, err := NewTOCFromByteSlice(b)
+	if err != nil {
 		return nil, errors.Wrap(err, "read TOC")
 	}
-	if err := r.readSymbols(int(r.toc.symbols)); err != nil {
+
+	r.symbolsV2, r.symbolsV1, err = ReadSymbols(r.b, r.version, int(toc.Symbols))
+	if err != nil {
 		return nil, errors.Wrap(err, "read symbols")
 	}
-	var err error
 
 	// Use the strings already allocated by symbols, rather than
 	// re-allocating them again below.
-	symbols := make(map[string]string, len(r.symbols)+len(r.symbolSlice))
-	for _, s := range r.symbols {
-		symbols[s] = s
+	// Additionally, calculate symbolsTableSize.
+	allocatedSymbols := make(map[string]string, len(r.symbolsV1)+len(r.symbolsV2))
+	for _, s := range r.symbolsV1 {
+		r.symbolsTableSize += uint64(len(s) + 8)
+		allocatedSymbols[s] = s
 	}
-	for _, s := range r.symbolSlice {
-		symbols[s] = s
+	for _, s := range r.symbolsV2 {
+		r.symbolsTableSize += uint64(len(s) + 8)
+		allocatedSymbols[s] = s
 	}
 
-	err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error {
+	if err := ReadOffsetTable(r.b, toc.LabelIndicesTable, func(key []string, off uint64) error {
 		if len(key) != 1 {
-			return errors.Errorf("unexpected key length %d", len(key))
+			return errors.Errorf("unexpected key length for label indices table %d", len(key))
 		}
-		r.labels[symbols[key[0]]] = off
+
+		r.labels[allocatedSymbols[key[0]]] = off
 		return nil
-	})
-	if err != nil {
+	}); err != nil {
 		return nil, errors.Wrap(err, "read label index table")
 	}
+
 	r.postings[""] = map[string]uint64{}
-	err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint64) error {
+	if err := ReadOffsetTable(r.b, toc.PostingsTable, func(key []string, off uint64) error {
 		if len(key) != 2 {
-			return errors.Errorf("unexpected key length %d", len(key))
+			return errors.Errorf("unexpected key length for posting table %d", len(key))
 		}
 		if _, ok := r.postings[key[0]]; !ok {
-			r.postings[symbols[key[0]]] = map[string]uint64{}
+			r.postings[allocatedSymbols[key[0]]] = map[string]uint64{}
 		}
-		r.postings[key[0]][symbols[key[1]]] = off
+		r.postings[key[0]][allocatedSymbols[key[1]]] = off
 		return nil
-	})
-	if err != nil {
+	}); err != nil {
 		return nil, errors.Wrap(err, "read postings table")
 	}
 
-	r.dec = &Decoder{lookupSymbol: r.lookupSymbol}
+	r.dec = &Decoder{LookupSymbol: r.lookupSymbol}
 
 	return r, nil
 }
@@ -678,146 +707,45 @@ func (r *Reader) Version() int {
 	return r.version
 }
 
-// Range marks a byte range.
-type Range struct {
-	Start, End int64
-}
-
-// PostingsRanges returns a new map of byte range in the underlying index file
-// for all postings lists.
-func (r *Reader) PostingsRanges() (map[labels.Label]Range, error) {
-	m := map[labels.Label]Range{}
-
-	for k, e := range r.postings {
-		for v, start := range e {
-			d := r.decbufAt(int(start))
-			if d.err() != nil {
-				return nil, d.err()
-			}
-			m[labels.Label{Name: k, Value: v}] = Range{
-				Start: int64(start) + 4,
-				End:   int64(start) + 4 + int64(d.len()),
-			}
-		}
-	}
-	return m, nil
-}
-
-func (r *Reader) readTOC() error {
-	if r.b.Len() < indexTOCLen {
-		return errInvalidSize
-	}
-	b := r.b.Range(r.b.Len()-indexTOCLen, r.b.Len())
-
-	expCRC := binary.BigEndian.Uint32(b[len(b)-4:])
-	d := decbuf{b: b[:len(b)-4]}
-
-	if d.crc32() != expCRC {
-		return errors.Wrap(errInvalidChecksum, "read TOC")
-	}
-
-	r.toc.symbols = d.be64()
-	r.toc.series = d.be64()
-	r.toc.labelIndices = d.be64()
-	r.toc.labelIndicesTable = d.be64()
-	r.toc.postings = d.be64()
-	r.toc.postingsTable = d.be64()
-
-	return d.err()
-}
-
-// decbufAt returns a new decoding buffer. It expects the first 4 bytes
-// after offset to hold the big endian encoded content length, followed by the contents and the expected
-// checksum.
-func (r *Reader) decbufAt(off int) decbuf {
-	if r.b.Len() < off+4 {
-		return decbuf{e: errInvalidSize}
-	}
-	b := r.b.Range(off, off+4)
-	l := int(binary.BigEndian.Uint32(b))
-
-	if r.b.Len() < off+4+l+4 {
-		return decbuf{e: errInvalidSize}
-	}
-
-	// Load bytes holding the contents plus a CRC32 checksum.
-	b = r.b.Range(off+4, off+4+l+4)
-	dec := decbuf{b: b[:len(b)-4]}
-
-	if exp := binary.BigEndian.Uint32(b[len(b)-4:]); dec.crc32() != exp {
-		return decbuf{e: errInvalidChecksum}
-	}
-	return dec
-}
-
-// decbufUvarintAt returns a new decoding buffer. It expects the first bytes
-// after offset to hold the uvarint-encoded buffers length, followed by the contents and the expected
-// checksum.
-func (r *Reader) decbufUvarintAt(off int) decbuf {
-	// We never have to access this method at the far end of the byte slice. Thus just checking
-	// against the MaxVarintLen32 is sufficient.
-	if r.b.Len() < off+binary.MaxVarintLen32 {
-		return decbuf{e: errInvalidSize}
-	}
-	b := r.b.Range(off, off+binary.MaxVarintLen32)
-
-	l, n := binary.Uvarint(b)
-	if n <= 0 || n > binary.MaxVarintLen32 {
-		return decbuf{e: errors.Errorf("invalid uvarint %d", n)}
-	}
-
-	if r.b.Len() < off+n+int(l)+4 {
-		return decbuf{e: errInvalidSize}
-	}
-
-	// Load bytes holding the contents plus a CRC32 checksum.
-	b = r.b.Range(off+n, off+n+int(l)+4)
-	dec := decbuf{b: b[:len(b)-4]}
-
-	if dec.crc32() != binary.BigEndian.Uint32(b[len(b)-4:]) {
-		return decbuf{e: errInvalidChecksum}
-	}
-	return dec
-}
-
-// readSymbols reads the symbol table fully into memory and allocates proper strings for them.
+// ReadSymbols reads the symbol table fully into memory and allocates proper strings for them.
 // Strings backed by the mmap'd memory would cause memory faults if applications keep using them
 // after the reader is closed.
-func (r *Reader) readSymbols(off int) error {
+func ReadSymbols(bs ByteSlice, version int, off int) ([]string, map[uint32]string, error) {
 	if off == 0 {
-		return nil
+		return nil, nil, nil
 	}
-	d := r.decbufAt(off)
+	d := newDecbufAt(bs, off)
 
 	var (
-		origLen = d.len()
-		cnt     = d.be32int()
-		basePos = uint32(off) + 4
-		nextPos = basePos + uint32(origLen-d.len())
+		origLen     = d.len()
+		cnt         = d.be32int()
+		basePos     = uint32(off) + 4
+		nextPos     = basePos + uint32(origLen-d.len())
+		symbolSlice []string
+		symbols     = map[uint32]string{}
 	)
-	if r.version == 2 {
-		r.symbolSlice = make([]string, 0, cnt)
+	if version == 2 {
+		symbolSlice = make([]string, 0, cnt)
 	}
 
 	for d.err() == nil && d.len() > 0 && cnt > 0 {
 		s := d.uvarintStr()
 
-		if r.version == 2 {
-			r.symbolSlice = append(r.symbolSlice, s)
+		if version == 2 {
+			symbolSlice = append(symbolSlice, s)
 		} else {
-			r.symbols[nextPos] = s
+			symbols[nextPos] = s
 			nextPos = basePos + uint32(origLen-d.len())
 		}
 		cnt--
 	}
-	return errors.Wrap(d.err(), "read symbols")
+	return symbolSlice, symbols, errors.Wrap(d.err(), "read symbols")
 }
 
-// readOffsetTable reads an offset table at the given position calls f for each
-// found entry.f
-// If f returns an error it stops decoding and returns the received error,
-func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) error {
-	d := r.decbufAt(int(off))
+// ReadOffsetTable reads an offset table and at the given position calls f for each
+// found entry. If f returns an error it stops decoding and returns the received error.
+func ReadOffsetTable(bs ByteSlice, off uint64, f func([]string, uint64) error) error {
+	d := newDecbufAt(bs, int(off))
 	cnt := d.be32()
 
 	for d.err() == nil && d.len() > 0 && cnt > 0 {
@@ -839,16 +767,41 @@ func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) err
 	return d.err()
 }
 
+// Range marks a byte range.
+type Range struct {
+	Start, End int64
+}
+
+// PostingsRanges returns a new map of byte range in the underlying index file
+// for all postings lists.
+func (r *Reader) PostingsRanges() (map[labels.Label]Range, error) {
+	m := map[labels.Label]Range{}
+
+	for k, e := range r.postings {
+		for v, start := range e {
+			d := newDecbufAt(r.b, int(start))
+			if d.err() != nil {
+				return nil, d.err()
+			}
+			m[labels.Label{Name: k, Value: v}] = Range{
+				Start: int64(start) + 4,
+				End:   int64(start) + 4 + int64(d.len()),
+			}
+		}
+	}
+	return m, nil
+}
+
 // Close the reader and its underlying resources.
 func (r *Reader) Close() error {
 	return r.c.Close()
 }
 
 func (r *Reader) lookupSymbol(o uint32) (string, error) {
-	if int(o) < len(r.symbolSlice) {
-		return r.symbolSlice[o], nil
+	if int(o) < len(r.symbolsV2) {
+		return r.symbolsV2[o], nil
 	}
-	s, ok := r.symbols[o]
+	s, ok := r.symbolsV1[o]
 	if !ok {
 		return "", errors.Errorf("unknown symbol offset %d", o)
 	}
@@ -857,12 +810,12 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
 
 // Symbols returns a set of symbols that exist within the index.
 func (r *Reader) Symbols() (map[string]struct{}, error) {
-	res := make(map[string]struct{}, len(r.symbols))
+	res := make(map[string]struct{}, len(r.symbolsV1)+len(r.symbolsV2))
 
-	for _, s := range r.symbols {
+	for _, s := range r.symbolsV1 {
 		res[s] = struct{}{}
 	}
-	for _, s := range r.symbolSlice {
+	for _, s := range r.symbolsV2 {
 		res[s] = struct{}{}
 	}
 	return res, nil
@@ -870,14 +823,7 @@ func (r *Reader) Symbols() (map[string]struct{}, error) {
 
 // SymbolTableSize returns the symbol table that is used to resolve symbol references.
 func (r *Reader) SymbolTableSize() uint64 {
-	var size int
-	for _, s := range r.symbols {
-		size += len(s) + 8
-	}
-	for _, s := range r.symbolSlice {
-		size += len(s) + 8
-	}
-	return uint64(size)
+	return r.symbolsTableSize
 }
 
 // LabelValues returns value tuples that exist for the given label name tuples.
@@ -892,7 +838,7 @@ func (r *Reader) LabelValues(names ...string) (StringTuples, error) {
 		//return nil, fmt.Errorf("label index doesn't exist")
 	}
 
-	d := r.decbufAt(int(off))
+	d := newDecbufAt(r.b, int(off))
 
 	nc := d.be32int()
 	d.be32() // consume unused value entry count.
@@ -916,7 +862,7 @@ func (emptyStringTuples) Len() int                   { return 0 }
 // LabelIndices returns a slice of label names for which labels or label tuples value indices exist.
 // NOTE: This is deprecated. Use `LabelNames()` instead.
 func (r *Reader) LabelIndices() ([][]string, error) {
-	res := [][]string{}
+	var res [][]string
 	for s := range r.labels {
 		res = append(res, strings.Split(s, labelNameSeperator))
 	}
@@ -931,7 +877,7 @@ func (r *Reader) Series(id uint64, lbls *labels.Labels, chks *[]chunks.Meta) err
 	if r.version == 2 {
 		offset = id * 16
 	}
-	d := r.decbufUvarintAt(int(offset))
+	d := newDecbufUvarintAt(r.b, int(offset))
 	if d.err() != nil {
 		return d.err()
 	}
@@ -948,7 +894,7 @@ func (r *Reader) Postings(name, value string) (Postings, error) {
 	if !ok {
 		return EmptyPostings(), nil
 	}
-	d := r.decbufAt(int(off))
+	d := newDecbufAt(r.b, int(off))
 	if d.err() != nil {
 		return nil, errors.Wrap(d.err(), "get postings entry")
 	}
@@ -1062,7 +1008,7 @@ func (t *serializedStringTuples) At(i int) ([]string, error) {
 // It currently does not contain decoding methods for all entry types but can be extended
 // by them if there's demand.
 type Decoder struct {
-	lookupSymbol func(uint32) (string, error)
+	LookupSymbol func(uint32) (string, error)
 }
 
 // Postings returns a postings list for b and its number of elements.
@@ -1090,11 +1036,11 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]chunks.Meta) e
 			return errors.Wrap(d.err(), "read series label offsets")
 		}
 
-		ln, err := dec.lookupSymbol(lno)
+		ln, err := dec.LookupSymbol(lno)
 		if err != nil {
 			return errors.Wrap(err, "lookup label name")
 		}
-		lv, err := dec.lookupSymbol(lvo)
+		lv, err := dec.LookupSymbol(lvo)
 		if err != nil {
 			return errors.Wrap(err, "lookup label value")
 		}
diff --git a/index/index_test.go b/index/index_test.go
index f7a81562..ebb53dc5 100644
--- a/index/index_test.go
+++ b/index/index_test.go
@@ -378,13 +378,28 @@ func TestPersistence_index_e2e(t *testing.T) {
 		}
 	}
 
+	gotSymbols, err := ir.Symbols()
+	testutil.Ok(t, err)
+
+	testutil.Equals(t, len(mi.symbols), len(gotSymbols))
+	for s := range mi.symbols {
+		_, ok := gotSymbols[s]
+		testutil.Assert(t, ok, "")
+	}
+
 	testutil.Ok(t, ir.Close())
 }
 
-func TestReaderWithInvalidBuffer(t *testing.T) {
+func TestDecbufUvariantWithInvalidBuffer(t *testing.T) {
 	b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
-	r := &Reader{b: b}
 
-	db := r.decbufUvarintAt(0)
+	db := newDecbufUvarintAt(b, 0)
 	testutil.NotOk(t, db.err())
 }
+
+func TestReaderWithInvalidBuffer(t *testing.T) {
+	b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
+
+	_, err := NewReader(b)
+	testutil.NotOk(t, err)
+}