From e3ebaa4018d88189a96f99e4659b1bfa2e3776b5 Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Wed, 20 Dec 2023 12:24:43 -0500 Subject: [PATCH] Fix "zed dev vcache copy" Do that by plugging some holes left by #4925. * In runtime/vcache/ztests/, re-enable skipped "zed dev vcache copy" tests. * In cmd/zed/dev/vcache/copy.Command.Run, resurrect the call to zio.Copy. * In package runtime/vcache: * Resurrect the Reader type and update its Read method. * Resurrect Object.NewReader. * Add loader type to hold the *zed.Context needed when loading a vector of type values. * Convert existing loadXXX functions to loader methods. * In loadArray, fix bad nil argument to loadVector. * In loadMap, load lengths vector. * In loadNulls, create slots slice and load values vector. * In loadPrimitive, remove null handling (vector.Nulls now does that) and add support for missing Zed types (float16, float32, float64, bytes, ip, net, and type). * In loadRecord, load all fields when no specific path is requested. * In loadUnion, load tags vector. * In loadVector, assign to *any when loading a constant vector. * In package vector: * Add missing Byte, Float, IP, Net, and Type types. * Add missing length field to Map type. * Add missing tags field to Union type. * Implement NewBuilder method for Array, Map, and Union types. * Handle nulls for all vector types by replacing the Nullmask type with Nulls, which wraps a vector.Any. --- cmd/zed/dev/vcache/copy/command.go | 11 +- runtime/vcache/array.go | 11 +- runtime/vcache/map.go | 14 +- runtime/vcache/nulls.go | 26 ++-- runtime/vcache/object.go | 10 +- runtime/vcache/primitive.go | 131 +++++++----------- runtime/vcache/reader.go | 64 ++++----- runtime/vcache/record.go | 10 +- runtime/vcache/union.go | 11 +- runtime/vcache/vector.go | 26 ++-- runtime/vcache/ztests/array-copy.yaml | 2 - runtime/vcache/ztests/map-copy.yaml | 2 - runtime/vcache/ztests/named-copy.yaml | 2 - .../vcache/ztests/primitive-const-copy.yaml | 33 +++++ runtime/vcache/ztests/primitive-copy.yaml | 53 +++++++ runtime/vcache/ztests/record-copy.yaml | 2 - runtime/vcache/ztests/set-copy.yaml | 2 - runtime/vcache/ztests/union-copy.yaml | 2 - vector/array.go | 26 +++- vector/bool.go | 12 +- vector/bytes.go | 34 +++++ vector/float.go | 44 ++++++ vector/int.go | 13 +- vector/ip.go | 36 +++++ vector/map.go | 35 ++++- vector/net.go | 36 +++++ vector/nulls.go | 53 +++++-- vector/string.go | 12 +- vector/type.go | 34 +++++ vector/uint.go | 13 +- vector/union.go | 22 ++- 31 files changed, 556 insertions(+), 226 deletions(-) create mode 100644 runtime/vcache/ztests/primitive-const-copy.yaml create mode 100644 runtime/vcache/ztests/primitive-copy.yaml create mode 100644 vector/bytes.go create mode 100644 vector/float.go create mode 100644 vector/ip.go create mode 100644 vector/net.go create mode 100644 vector/type.go diff --git a/cmd/zed/dev/vcache/copy/command.go b/cmd/zed/dev/vcache/copy/command.go index 9d65d99a54..3c1c7960f9 100644 --- a/cmd/zed/dev/vcache/copy/command.go +++ b/cmd/zed/dev/vcache/copy/command.go @@ -10,6 +10,7 @@ import ( "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/pkg/storage" "github.com/brimdata/zed/runtime/vcache" + "github.com/brimdata/zed/zio" "github.com/segmentio/ksuid" ) @@ -66,11 +67,9 @@ func (c *Command) Run(args []string) error { if err != nil { return err } - /* - if err := zio.Copy(writer, object.NewReader()); err != nil { - writer.Close() - return err - } - */ + if err := zio.Copy(writer, object.NewReader()); err != nil { + writer.Close() + return err + } return writer.Close() } diff --git a/runtime/vcache/array.go b/runtime/vcache/array.go index 1e8a11e8ad..bf9816a6cc 100644 --- a/runtime/vcache/array.go +++ b/runtime/vcache/array.go @@ -2,7 +2,6 @@ package vcache import ( "fmt" - "io" "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" @@ -11,7 +10,7 @@ import ( meta "github.com/brimdata/zed/vng/vector" //XXX rename package ) -func loadArray(any *vector.Any, typ zed.Type, path field.Path, m *meta.Array, r io.ReaderAt) (*vector.Array, error) { +func (l *loader) loadArray(any *vector.Any, typ zed.Type, path field.Path, m *meta.Array) (*vector.Array, error) { if *any == nil { var innerType zed.Type switch typ := typ.(type) { @@ -22,15 +21,15 @@ func loadArray(any *vector.Any, typ zed.Type, path field.Path, m *meta.Array, r default: return nil, fmt.Errorf("internal error: vcache.loadArray encountered bad type: %s", typ) } - lengths, err := vng.ReadIntVector(m.Lengths, r) + lengths, err := vng.ReadIntVector(m.Lengths, l.r) if err != nil { return nil, err } - values, err := loadVector(nil, innerType, path, m.Values, r) - if err != nil { + var values vector.Any + if _, err := l.loadVector(&values, innerType, path, m.Values); err != nil { return nil, err } - *any = vector.NewArray(typ.(*zed.TypeArray), lengths, values) + *any = vector.NewArray(typ, lengths, values) } //XXX always return the array as the vector engine needs to know how to handle // manipulating the array no matter what it contains diff --git a/runtime/vcache/map.go b/runtime/vcache/map.go index 7a124554ae..a44f92008c 100644 --- a/runtime/vcache/map.go +++ b/runtime/vcache/map.go @@ -2,30 +2,34 @@ package vcache import ( "fmt" - "io" "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" "github.com/brimdata/zed/vector" + "github.com/brimdata/zed/vng" meta "github.com/brimdata/zed/vng/vector" ) -func loadMap(any *vector.Any, typ zed.Type, path field.Path, m *meta.Map, r io.ReaderAt) (*vector.Map, error) { +func (l *loader) loadMap(any *vector.Any, typ zed.Type, path field.Path, m *meta.Map) (*vector.Map, error) { if *any == nil { mapType, ok := typ.(*zed.TypeMap) if !ok { return nil, fmt.Errorf("internal error: vcache.loadMap encountered bad type: %s", typ) } + lengths, err := vng.ReadIntVector(m.Lengths, l.r) + if err != nil { + return nil, err + } var keys, values vector.Any - _, err := loadVector(&keys, mapType.KeyType, path, m.Keys, r) + _, err = l.loadVector(&keys, mapType.KeyType, path, m.Keys) if err != nil { return nil, err } - _, err = loadVector(&values, mapType.ValType, path, m.Values, r) + _, err = l.loadVector(&values, mapType.ValType, path, m.Values) if err != nil { return nil, err } - *any = vector.NewMap(mapType, keys, values) + *any = vector.NewMap(mapType, lengths, keys, values) } return (*any).(*vector.Map), nil } diff --git a/runtime/vcache/nulls.go b/runtime/vcache/nulls.go index 9f44e01cf5..6440f15825 100644 --- a/runtime/vcache/nulls.go +++ b/runtime/vcache/nulls.go @@ -9,18 +9,16 @@ import ( meta "github.com/brimdata/zed/vng/vector" ) -func loadNulls(any *vector.Any, typ zed.Type, path field.Path, m *meta.Nulls, r io.ReaderAt) (vector.Any, error) { +func (l *loader) loadNulls(any *vector.Any, typ zed.Type, path field.Path, m *meta.Nulls) (vector.Any, error) { // The runlengths are typically small so we load them with the metadata // and don't bother waiting for a reference. - runlens := meta.NewInt64Reader(m.Runs, r) //XXX 32-bit reader? - var off, nulls uint32 - null := true - //XXX finish this loop... need to remove slots covered by nulls and subtract - // cumulative number of nulls for each surviving value slot. + runlens := meta.NewInt64Reader(m.Runs, l.r) //XXX 32-bit reader? + var null bool + var off int + var slots []uint32 // In zed, nulls are generally bad and not really needed because we don't // need super-wide uber schemas with lots of nulls. for { - //XXX need nullslots array to build vector.Nullmask and need a way to pass down Nullmask XXX run, err := runlens.Read() if err != nil { if err == io.EOF { @@ -28,12 +26,18 @@ func loadNulls(any *vector.Any, typ zed.Type, path field.Path, m *meta.Nulls, r } return nil, err } - off += uint32(run) if null { - nulls += uint32(run) + for i := 0; int64(i) < run; i++ { + slots = append(slots, uint32(off+i)) + } } + off += int(run) null = !null } - //newSlots := slots //XXX need to create this above - return loadVector(any, typ, path, m.Values, r) + var values vector.Any + if _, err := l.loadVector(&values, typ, path, m.Values); err != nil { + return nil, err + } + *any = vector.NewNulls(slots, off, values) + return *any, nil } diff --git a/runtime/vcache/object.go b/runtime/vcache/object.go index e6364986e2..228c882544 100644 --- a/runtime/vcache/object.go +++ b/runtime/vcache/object.go @@ -135,7 +135,15 @@ func (o *Object) Len() int { // types in the hiearchy). Load returns a Group for each type and the Group // may contain multiple vectors. func (o *Object) Load(typeKey uint32, path field.Path) (vector.Any, error) { + l := loader{o.local, o.reader} o.mu[typeKey].Lock() defer o.mu[typeKey].Unlock() - return loadVector(&o.vectors[typeKey], o.typeDict[typeKey], path, o.metas[typeKey], o.reader) + return l.loadVector(&o.vectors[typeKey], o.typeDict[typeKey], path, o.metas[typeKey]) +} + +func (o *Object) NewReader() *Reader { + return &Reader{ + object: o, + builders: make([]vector.Builder, len(o.vectors)), + } } diff --git a/runtime/vcache/primitive.go b/runtime/vcache/primitive.go index e408d0ada7..f69aecab46 100644 --- a/runtime/vcache/primitive.go +++ b/runtime/vcache/primitive.go @@ -2,7 +2,7 @@ package vcache import ( "fmt" - "io" + "net/netip" "github.com/brimdata/zed" "github.com/brimdata/zed/vector" @@ -10,7 +10,7 @@ import ( "github.com/brimdata/zed/zcode" ) -func loadPrimitive(typ zed.Type, m *meta.Primitive, r io.ReaderAt) (vector.Any, error) { +func (l *loader) loadPrimitive(typ zed.Type, m *meta.Primitive) (vector.Any, error) { // The VNG primitive columns are stored as one big // list of Zed values. So we can just read the data in // all at once, compute the byte offsets of each value @@ -22,7 +22,7 @@ func loadPrimitive(typ zed.Type, m *meta.Primitive, r io.ReaderAt) (vector.Any, bytes := make([]byte, n) var off int for _, segment := range m.Segmap { - if err := segment.Read(r, bytes[off:]); err != nil { + if err := segment.Read(l.r, bytes[off:]); err != nil { return nil, err } off += int(segment.MemLength) @@ -34,103 +34,70 @@ func loadPrimitive(typ zed.Type, m *meta.Primitive, r io.ReaderAt) (vector.Any, } bytes = b } + it := zcode.Iter(bytes) switch typ := typ.(type) { case *zed.TypeOfUint8, *zed.TypeOfUint16, *zed.TypeOfUint32, *zed.TypeOfUint64: - //XXX put valcnt in vng meta and use vector allocator - var vals []uint64 - var nullslots []uint32 - it := zcode.Bytes(bytes).Iter() + var values []uint64 for !it.Done() { - val := it.Next() - if val == nil { - nullslots = append(nullslots, uint32(len(vals))) - vals = append(vals, 0) - } else { - vals = append(vals, zed.DecodeUint(val)) - } + values = append(values, zed.DecodeUint(it.Next())) } - return vector.NewUint(typ, vals, vector.NewNullmask(nullslots, len(vals))), nil + return vector.NewUint(typ, values), nil case *zed.TypeOfInt8, *zed.TypeOfInt16, *zed.TypeOfInt32, *zed.TypeOfInt64, *zed.TypeOfDuration, *zed.TypeOfTime: - //XXX put valcnt in vng meta and use vector allocator - var vals []int64 - var nullslots []uint32 - it := zcode.Bytes(bytes).Iter() + var values []int64 for !it.Done() { - val := it.Next() - if val == nil { - nullslots = append(nullslots, uint32(len(vals))) - vals = append(vals, 0) - } else { - vals = append(vals, zed.DecodeInt(val)) - } + values = append(values, zed.DecodeInt(it.Next())) } - return vector.NewInt(typ, vals, vector.NewNullmask(nullslots, len(vals))), nil - case *zed.TypeOfFloat16: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) - case *zed.TypeOfFloat32: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) - case *zed.TypeOfFloat64: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) + return vector.NewInt(typ, values), nil + case *zed.TypeOfFloat16, *zed.TypeOfFloat32, *zed.TypeOfFloat64: + var values []float64 + for !it.Done() { + values = append(values, zed.DecodeFloat(it.Next())) + } + return vector.NewFloat(typ, values), nil case *zed.TypeOfBool: - var vals []bool - var nullslots []uint32 - it := zcode.Bytes(bytes).Iter() + var values []bool for !it.Done() { - val := it.Next() - if val == nil { - nullslots = append(nullslots, uint32(len(vals))) - vals = append(vals, false) - } else { - vals = append(vals, zed.DecodeBool(val)) - } + values = append(values, zed.DecodeBool(it.Next())) } - return vector.NewBool(typ, vals, vector.NewNullmask(nullslots, len(vals))), nil + return vector.NewBool(typ, values), nil case *zed.TypeOfBytes: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) + var values [][]byte + for !it.Done() { + values = append(values, zed.DecodeBytes(it.Next())) + } + return vector.NewBytes(typ, values), nil case *zed.TypeOfString: - var vals []string - var nullslots []uint32 - it := zcode.Bytes(bytes).Iter() + var values []string for !it.Done() { - val := it.Next() - if val == nil { - nullslots = append(nullslots, uint32(len(vals))) - } else { - vals = append(vals, zed.DecodeString(val)) - } + values = append(values, zed.DecodeString(it.Next())) } - return vector.NewString(typ, vals, vector.NewNullmask(nullslots, len(vals))), nil + return vector.NewString(typ, values), nil case *zed.TypeOfIP: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) + var values []netip.Addr + for !it.Done() { + values = append(values, zed.DecodeIP(it.Next())) + } + return vector.NewIP(typ, values), nil case *zed.TypeOfNet: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) - case *zed.TypeOfNull: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) + var values []netip.Prefix + for !it.Done() { + values = append(values, zed.DecodeNet(it.Next())) + } + return vector.NewNet(typ, values), nil case *zed.TypeOfType: - return nil, fmt.Errorf("vcache.Primitive.Load TBD for %T", typ) + var values []zed.Type + for !it.Done() { + t, err := l.zctx.LookupByValue(it.Next()) + if err != nil { + return nil, err + } + values = append(values, t) + } + return vector.NewType(typ, values), nil + case *zed.TypeOfNull: + return vector.NewConst(zed.Null, 0), nil } - return nil, nil - /* - XXX - if dict := p.meta.Dict; dict != nil { - bytes := p.bytes - return func(b *zcode.Builder) error { - pos := bytes[0] - bytes = bytes[1:] - b.Append(dict[pos].Value.Bytes()) - return nil - }, nil - } - it := zcode.Iter(p.bytes) - return func(b *zcode.Builder) error { - b.Append(it.Next()) - return nil - }, nil - - /* XXX - - return nil, fmt.Errorf("internal error: vcache.Primitive.Load uknown type %T", typ) - */ + return nil, fmt.Errorf("internal error: vcache.loadPrimitive got unknown type %#v", typ) } type Const struct { diff --git a/runtime/vcache/reader.go b/runtime/vcache/reader.go index 11277a51d6..b155f19a12 100644 --- a/runtime/vcache/reader.go +++ b/runtime/vcache/reader.go @@ -1,9 +1,18 @@ package vcache -/* +import ( + "fmt" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/vector" + "github.com/brimdata/zed/zcode" + "github.com/brimdata/zed/zio" +) + type Reader struct { - object *Object - iters []iterator + object *Object + builders []vector.Builder + off int builder zcode.Builder val zed.Value @@ -12,32 +21,25 @@ type Reader struct { var _ zio.Reader = (*Reader)(nil) func (r *Reader) Read() (*zed.Value, error) { - o := r.object - - if r.off >= len(o.typeIDs) { - return nil, nil - } - - id := o.typeIDs[r.off] - r.off++ - it := r.iters[id] - - if it == nil { - var err error - it, err = o.vectors[id].NewIter(o.reader) - if err != nil { - return nil, err - } - r.iters[id] = it - } - - r.builder.Truncate() - - if err := it(&r.builder); err != nil { - return nil, err - } - - r.val = *zed.NewValue(o.types[id], r.builder.Bytes().Body()) - return &r.val, nil + o := r.object + if r.off >= len(o.typeKeys) { + return nil, nil + } + key := o.typeKeys[r.off] + b := r.builders[key] + if b == nil { + vec, err := o.Load(uint32(key), nil) + if err != nil { + return nil, err + } + b = vec.NewBuilder() + r.builders[key] = b + } + r.builder.Truncate() + if !b(&r.builder) { + panic(fmt.Sprintf("vector.Builder returned false for key %d at offset %d", key, r.off)) + } + r.off++ + r.val = *zed.NewValue(o.typeDict[key], r.builder.Bytes().Body()) + return &r.val, nil } -*/ diff --git a/runtime/vcache/record.go b/runtime/vcache/record.go index d154461a0c..6e59bfeea8 100644 --- a/runtime/vcache/record.go +++ b/runtime/vcache/record.go @@ -2,7 +2,6 @@ package vcache import ( "fmt" - "io" "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" @@ -14,7 +13,7 @@ import ( //XXX we need locking as multiple threads can access Native columns concurrently // should do a fast lookup on the path -func loadRecord(any *vector.Any, typ *zed.TypeRecord, path field.Path, meta *meta.Record, r io.ReaderAt) (vector.Any, error) { +func (l *loader) loadRecord(any *vector.Any, typ *zed.TypeRecord, path field.Path, meta *meta.Record) (vector.Any, error) { if *any == nil { *any = vector.NewRecord(typ) } @@ -23,6 +22,11 @@ func loadRecord(any *vector.Any, typ *zed.TypeRecord, path field.Path, meta *met return nil, fmt.Errorf("system error: vcache.loadRecord not a record type %q", zson.String(vec.Typ)) } if len(path) == 0 { + for i, f := range meta.Fields { + if _, err := l.loadVector(&vec.Fields[i], typ.Fields[i].Type, nil, f.Values); err != nil { + return nil, err + } + } return vec, nil } fieldName := path[0] @@ -30,7 +34,7 @@ func loadRecord(any *vector.Any, typ *zed.TypeRecord, path field.Path, meta *met if !ok { return nil, fmt.Errorf("system error: vcache.loadRecord no such field %q in record type %q", fieldName, zson.String(vec.Typ)) } - return loadVector(&vec.Fields[off], typ.Fields[off].Type, path[1:], meta.Fields[off].Values, r) + return l.loadVector(&vec.Fields[off], typ.Fields[off].Type, path[1:], meta.Fields[off].Values) } // XXX since cache is persistent across queries, does it still make sense to diff --git a/runtime/vcache/union.go b/runtime/vcache/union.go index ea6d31bc16..9fac6c55cc 100644 --- a/runtime/vcache/union.go +++ b/runtime/vcache/union.go @@ -2,16 +2,16 @@ package vcache import ( "fmt" - "io" "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" "github.com/brimdata/zed/vector" + "github.com/brimdata/zed/vng" meta "github.com/brimdata/zed/vng/vector" "github.com/brimdata/zed/zson" ) -func loadUnion(any *vector.Any, typ *zed.TypeUnion, path field.Path, m *meta.Union, r io.ReaderAt) (*vector.Union, error) { +func (l *loader) loadUnion(any *vector.Any, typ *zed.TypeUnion, path field.Path, m *meta.Union) (*vector.Union, error) { if *any == nil { *any = vector.NewUnion(typ) } @@ -19,10 +19,15 @@ func loadUnion(any *vector.Any, typ *zed.TypeUnion, path field.Path, m *meta.Uni if !ok { return nil, fmt.Errorf("system error: vcache.loadUnion not a union type %q", zson.String(vec.Typ)) } + tags, err := vng.ReadIntVector(m.Tags, l.r) + if err != nil { + return nil, err + } + vec.Tags = tags //XXX should just load paths we want here? for now, load everything. for k := range vec.Values { var err error - _, err = loadVector(&vec.Values[k], typ.Types[k], path, m.Values[k], r) + _, err = l.loadVector(&vec.Values[k], typ.Types[k], path, m.Values[k]) if err != nil { return nil, err } diff --git a/runtime/vcache/vector.go b/runtime/vcache/vector.go index ecccb79b23..2d91b245d9 100644 --- a/runtime/vcache/vector.go +++ b/runtime/vcache/vector.go @@ -11,18 +11,23 @@ import ( meta "github.com/brimdata/zed/vng/vector" ) -func loadVector(any *vector.Any, typ zed.Type, path field.Path, m meta.Metadata, r io.ReaderAt) (vector.Any, error) { +type loader struct { + zctx *zed.Context + r io.ReaderAt +} + +func (l *loader) loadVector(any *vector.Any, typ zed.Type, path field.Path, m meta.Metadata) (vector.Any, error) { switch m := m.(type) { case *meta.Named: - return loadVector(any, typ.(*zed.TypeNamed).Type, path, m.Values, r) + return l.loadVector(any, typ.(*zed.TypeNamed).Type, path, m.Values) case *meta.Record: - return loadRecord(any, typ.(*zed.TypeRecord), path, m, r) + return l.loadRecord(any, typ.(*zed.TypeRecord), path, m) case *meta.Primitive: if len(path) != 0 { return nil, fmt.Errorf("internal error: vcache encountered path at primitive element: %q", strings.Join(path, ".")) } if *any == nil { - v, err := loadPrimitive(typ, m, r) + v, err := l.loadPrimitive(typ, m) if err != nil { return nil, err } @@ -30,18 +35,19 @@ func loadVector(any *vector.Any, typ zed.Type, path field.Path, m meta.Metadata, } return *any, nil case *meta.Array: - return loadArray(any, typ, path, m, r) + return l.loadArray(any, typ, path, m) case *meta.Set: a := *(*meta.Array)(m) - return loadArray(any, typ, path, &a, r) + return l.loadArray(any, typ, path, &a) case *meta.Map: - return loadMap(any, typ, path, m, r) + return l.loadMap(any, typ, path, m) case *meta.Union: - return loadUnion(any, typ.(*zed.TypeUnion), path, m, r) + return l.loadUnion(any, typ.(*zed.TypeUnion), path, m) case *meta.Nulls: - return loadNulls(any, typ, path, m, r) + return l.loadNulls(any, typ, path, m) case *meta.Const: - return vector.NewConst(m.Value, m.Count), nil + *any = vector.NewConst(m.Value, m.Count) + return *any, nil default: return nil, fmt.Errorf("vector cache: type %T not supported", m) } diff --git a/runtime/vcache/ztests/array-copy.yaml b/runtime/vcache/ztests/array-copy.yaml index 041dbe8ae3..d83ad33c03 100644 --- a/runtime/vcache/ztests/array-copy.yaml +++ b/runtime/vcache/ztests/array-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - # This test simply converts some ZSON to VNG then runs it through # the vector cache to exercise the logic that builds values from # cached vectors. diff --git a/runtime/vcache/ztests/map-copy.yaml b/runtime/vcache/ztests/map-copy.yaml index bddf088b19..0f50f24baf 100644 --- a/runtime/vcache/ztests/map-copy.yaml +++ b/runtime/vcache/ztests/map-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - # This test simply converts some ZSON to VNG then runs it through # the vector cache to exercise the logic that builds values from # cached vectors. diff --git a/runtime/vcache/ztests/named-copy.yaml b/runtime/vcache/ztests/named-copy.yaml index ef1508b1e9..32ec47c98f 100644 --- a/runtime/vcache/ztests/named-copy.yaml +++ b/runtime/vcache/ztests/named-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - script: | zq -f vng -o test.vng - zed dev vcache copy -z test.vng diff --git a/runtime/vcache/ztests/primitive-const-copy.yaml b/runtime/vcache/ztests/primitive-const-copy.yaml new file mode 100644 index 0000000000..660d6b17f8 --- /dev/null +++ b/runtime/vcache/ztests/primitive-const-copy.yaml @@ -0,0 +1,33 @@ +# Exercise the logic that builds values from cached constant vectors. +script: | + zq -f vng -o test.vng - + zed dev vcache copy -z test.vng + +inputs: + - name: stdin + # One value per type so they'll be encoded as constant vectors. + data: &stdin | + 8(uint8) + 16(uint16) + 32(uint32) + 64(uint64) + -8(int8) + -16(int16) + -32(int32) + -64 + 1h2m3s + 2022-12-04T00:00:00Z + 16.(float16) + 32.(float32) + 64. + false + 0x00 + "0" + 1.2.3.4 + 1.2.3.0/24 + + null + +outputs: + - name: stdout + data: *stdin diff --git a/runtime/vcache/ztests/primitive-copy.yaml b/runtime/vcache/ztests/primitive-copy.yaml new file mode 100644 index 0000000000..f622980a49 --- /dev/null +++ b/runtime/vcache/ztests/primitive-copy.yaml @@ -0,0 +1,53 @@ +# Exercise the logic that builds values from cached non-constant vectors. +script: | + zq -f vng -o test.vng - + zed dev vcache copy -z test.vng + +inputs: + - name: stdin + # Two different value per type (except for null) so they won't be encoded as + # constant vectors. + data: &stdin | + 8(uint8) + 80(uint8) + 16(uint16) + 1600(uint16) + 32(uint32) + 320(uint32) + 64(uint64) + 640(uint64) + -8(int8) + -80(int8) + -16(int16) + -160(int16) + -32(int32) + -320(int32) + -64 + -640 + 1h2m3s + 1h2m30s + 2022-12-04T00:00:00Z + 2022-12-04T00:00:01Z + 16.(float16) + 160.(float16) + 32.(float32) + 320.(float32) + 64. + 640. + true + false + 0x00 + 0x0000 + "0" + "00" + 1.2.3.4 + 1.2.3.40 + 1.2.3.0/24 + 1.2.3.0/25 + + + null + +outputs: + - name: stdout + data: *stdin diff --git a/runtime/vcache/ztests/record-copy.yaml b/runtime/vcache/ztests/record-copy.yaml index a6692b808b..f7f8fa781b 100644 --- a/runtime/vcache/ztests/record-copy.yaml +++ b/runtime/vcache/ztests/record-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - # This test simply converts some ZSON to VNG then runs it through # the vector cache to exercise the logic that builds values from # cached vectors. diff --git a/runtime/vcache/ztests/set-copy.yaml b/runtime/vcache/ztests/set-copy.yaml index 30a8aee506..255b6af921 100644 --- a/runtime/vcache/ztests/set-copy.yaml +++ b/runtime/vcache/ztests/set-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - # This test simply converts some ZSON to VNG then runs it through # the vector cache to exercise the logic that builds values from # cached vectors. diff --git a/runtime/vcache/ztests/union-copy.yaml b/runtime/vcache/ztests/union-copy.yaml index aa5f2a93b3..70fc263a9b 100644 --- a/runtime/vcache/ztests/union-copy.yaml +++ b/runtime/vcache/ztests/union-copy.yaml @@ -1,5 +1,3 @@ -skip: disabled until "zed dev vcache copy" is fixed or replaced - # This test simply converts some ZSON to VNG then runs it through # the vector cache to exercise the logic that builds values from # cached vectors. diff --git a/vector/array.go b/vector/array.go index 31e9729fbf..15af00bd3c 100644 --- a/vector/array.go +++ b/vector/array.go @@ -2,18 +2,19 @@ package vector import ( "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" ) type Array struct { mem - Typ *zed.TypeArray //XXX type array or set + Typ zed.Type // Either *zed.TypeArray or *zed.TypeSet. Lengths []int32 Values Any } var _ Any = (*Array)(nil) -func NewArray(typ *zed.TypeArray, lengths []int32, values Any) *Array { +func NewArray(typ zed.Type, lengths []int32, values Any) *Array { return &Array{Typ: typ, Lengths: lengths, Values: values} } @@ -22,5 +23,24 @@ func (a *Array) Type() zed.Type { } func (a *Array) NewBuilder() Builder { - return nil //XXX + _, set := zed.TypeUnder(a.Typ).(*zed.TypeSet) + valueBuilder := a.Values.NewBuilder() + var off int + return func(b *zcode.Builder) bool { + if off >= len(a.Lengths) { + return false + } + b.BeginContainer() + for i := 0; i < int(a.Lengths[off]); i++ { + if !valueBuilder(b) { + panic(off) + } + } + if set { + b.TransformContainer(zed.NormalizeSet) + } + b.EndContainer() + off++ + return true + } } diff --git a/vector/bool.go b/vector/bool.go index 02f6e7f575..a07a3f0e25 100644 --- a/vector/bool.go +++ b/vector/bool.go @@ -9,13 +9,12 @@ type Bool struct { mem Typ zed.Type Values []bool //XXX bit vector - Nulls Nullmask } var _ Any = (*Bool)(nil) -func NewBool(typ zed.Type, vals []bool, nulls Nullmask) *Bool { - return &Bool{Typ: typ, Values: vals, Nulls: nulls} +func NewBool(typ zed.Type, vals []bool) *Bool { + return &Bool{Typ: typ, Values: vals} } func (b *Bool) Type() zed.Type { @@ -24,15 +23,10 @@ func (b *Bool) Type() zed.Type { func (b *Bool) NewBuilder() Builder { vals := b.Values - nulls := b.Nulls var voff int return func(b *zcode.Builder) bool { if voff < len(vals) { - if !nulls.Has(uint32(voff)) { - b.Append(zed.EncodeBool(vals[voff])) - } else { - b.Append(nil) - } + b.Append(zed.EncodeBool(vals[voff])) voff++ return true diff --git a/vector/bytes.go b/vector/bytes.go new file mode 100644 index 0000000000..4340747cc3 --- /dev/null +++ b/vector/bytes.go @@ -0,0 +1,34 @@ +package vector + +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) + +type Bytes struct { + mem + Typ zed.Type + Values [][]byte +} + +var _ Any = (*Bytes)(nil) + +func NewBytes(typ zed.Type, values [][]byte) *Bytes { + return &Bytes{Typ: typ, Values: values} +} + +func (b *Bytes) Type() zed.Type { + return b.Typ +} + +func (b *Bytes) NewBuilder() Builder { + var off int + return func(zb *zcode.Builder) bool { + if off >= len(b.Values) { + return false + } + zb.Append(zed.EncodeBytes(b.Values[off])) + off++ + return true + } +} diff --git a/vector/float.go b/vector/float.go new file mode 100644 index 0000000000..53f202608d --- /dev/null +++ b/vector/float.go @@ -0,0 +1,44 @@ +package vector + +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) + +type Float struct { + mem + Typ zed.Type + Values []float64 +} + +var _ Any = (*Float)(nil) + +func NewFloat(typ zed.Type, values []float64) *Float { + return &Float{Typ: typ, Values: values} +} + +func (f *Float) Type() zed.Type { + return f.Typ +} + +func (f *Float) NewBuilder() Builder { + typeID := f.Typ.ID() + var off int + return func(b *zcode.Builder) bool { + if off >= len(f.Values) { + return false + } + switch typeID { + case zed.IDFloat16: + b.Append(zed.EncodeFloat16(float32(f.Values[off]))) + case zed.IDFloat32: + b.Append(zed.EncodeFloat32(float32(f.Values[off]))) + case zed.IDFloat64: + b.Append(zed.EncodeFloat64(f.Values[off])) + default: + panic(f.Typ) + } + off++ + return true + } +} diff --git a/vector/int.go b/vector/int.go index 8af3ef4fcc..a4742cbc42 100644 --- a/vector/int.go +++ b/vector/int.go @@ -9,13 +9,12 @@ type Int struct { mem Typ zed.Type Values []int64 - Nulls Nullmask } var _ Any = (*Int)(nil) -func NewInt(typ zed.Type, vals []int64, nulls Nullmask) *Int { - return &Int{Typ: typ, Values: vals, Nulls: nulls} +func NewInt(typ zed.Type, values []int64) *Int { + return &Int{Typ: typ, Values: values} } func (i *Int) Type() zed.Type { @@ -24,18 +23,12 @@ func (i *Int) Type() zed.Type { func (i *Int) NewBuilder() Builder { vals := i.Values - nulls := i.Nulls var voff int return func(b *zcode.Builder) bool { if voff < len(vals) { - if nulls.Has(uint32(voff)) { - b.Append(nil) - } else { - b.Append(zed.EncodeInt(vals[voff])) - } + b.Append(zed.EncodeInt(vals[voff])) voff++ return true - } return false } diff --git a/vector/ip.go b/vector/ip.go new file mode 100644 index 0000000000..907d66ca09 --- /dev/null +++ b/vector/ip.go @@ -0,0 +1,36 @@ +package vector + +import ( + "net/netip" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) + +type IP struct { + mem + Typ zed.Type + Values []netip.Addr +} + +var _ Any = (*IP)(nil) + +func NewIP(typ zed.Type, values []netip.Addr) *IP { + return &IP{Typ: typ, Values: values} +} + +func (i *IP) Type() zed.Type { + return i.Typ +} + +func (i *IP) NewBuilder() Builder { + var off int + return func(b *zcode.Builder) bool { + if off >= len(i.Values) { + return false + } + b.Append(zed.EncodeIP(i.Values[off])) + off++ + return true + } +} diff --git a/vector/map.go b/vector/map.go index 96acd7a4b7..a73955991c 100644 --- a/vector/map.go +++ b/vector/map.go @@ -2,19 +2,21 @@ package vector import ( "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" ) type Map struct { mem - Typ *zed.TypeMap - Keys Any - Values Any + Typ *zed.TypeMap + Lengths []int32 + Keys Any + Values Any } var _ Any = (*Map)(nil) -func NewMap(typ *zed.TypeMap, keys Any, values Any) *Map { - return &Map{Typ: typ, Keys: keys, Values: values} +func NewMap(typ *zed.TypeMap, lengths []int32, keys Any, values Any) *Map { + return &Map{Typ: typ, Lengths: lengths, Keys: keys, Values: values} } func (m *Map) Type() zed.Type { @@ -22,5 +24,26 @@ func (m *Map) Type() zed.Type { } func (m *Map) NewBuilder() Builder { - return nil //XXX + keyBuilder := m.Keys.NewBuilder() + valueBuilder := m.Values.NewBuilder() + var off int + return func(b *zcode.Builder) bool { + if off >= len(m.Lengths) { + return false + } + b.BeginContainer() + for i := 0; i < int(m.Lengths[off]); i++ { + if !keyBuilder(b) { + panic(off) + } + if !valueBuilder(b) { + panic(off) + } + } + b.TransformContainer(zed.NormalizeMap) + b.EndContainer() + off++ + return true + } + } diff --git a/vector/net.go b/vector/net.go new file mode 100644 index 0000000000..b306a8ae42 --- /dev/null +++ b/vector/net.go @@ -0,0 +1,36 @@ +package vector + +import ( + "net/netip" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) + +type Net struct { + mem + Typ zed.Type + Values []netip.Prefix +} + +var _ Any = (*Net)(nil) + +func NewNet(typ zed.Type, values []netip.Prefix) *Net { + return &Net{Typ: typ, Values: values} +} + +func (n *Net) Type() zed.Type { + return n.Typ +} + +func (n *Net) NewBuilder() Builder { + var off int + return func(b *zcode.Builder) bool { + if off >= len(n.Values) { + return false + } + b.Append(zed.EncodeNet(n.Values[off])) + off++ + return true + } +} diff --git a/vector/nulls.go b/vector/nulls.go index 8bab5807d9..c04a97e3a5 100644 --- a/vector/nulls.go +++ b/vector/nulls.go @@ -1,23 +1,60 @@ package vector -type Nullmask []byte //XXX change to uint64 +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) -func NewNullmask(slots []uint32, nvals int) Nullmask { - var nulls Nullmask +type Nulls struct { + mem + bitmap []byte //XXX change to uint64 + nvals int + values Any +} + +var _ Any = (*Nulls)(nil) + +func NewNulls(slots []uint32, nvals int, values Any) *Nulls { + var nulls []byte if len(slots) > 0 { nulls = make([]byte, (nvals+7)/8) for _, slot := range slots { nulls[slot>>3] |= 1 << (slot & 7) } } - return nulls + return &Nulls{bitmap: nulls, nvals: nvals, values: values} } -func (n Nullmask) Has(slot uint32) bool { - off := slot / 8 - if off >= uint32(len(n)) { +func (n *Nulls) Type() zed.Type { + return n.values.Type() +} + +func (n *Nulls) Values() Any { + return n.values +} + +func (n *Nulls) NewBuilder() Builder { + valueBuilder := n.values.NewBuilder() + var off int + return func(b *zcode.Builder) bool { + if off >= n.nvals { + return false + } + if n.IsNull(off) { + b.Append(nil) + } else if !valueBuilder(b) { + return false + } + off++ + return true + } +} + +func (n *Nulls) IsNull(slot int) bool { + if int(slot) > n.nvals { return false } + off := slot / 8 pos := slot & 7 - return (n[off] & (1 << pos)) != 0 + return n.bitmap[off]&(1<= len(t.Values) { + return false + } + b.Append(zed.EncodeTypeValue(t.Values[off])) + off++ + return true + } +} diff --git a/vector/uint.go b/vector/uint.go index ca9a5f2207..b2903b3c64 100644 --- a/vector/uint.go +++ b/vector/uint.go @@ -9,13 +9,12 @@ type Uint struct { mem Typ zed.Type Values []uint64 - Nulls Nullmask } var _ Any = (*Uint)(nil) -func NewUint(typ zed.Type, vals []uint64, nulls Nullmask) *Uint { - return &Uint{Typ: typ, Values: vals, Nulls: nulls} +func NewUint(typ zed.Type, values []uint64) *Uint { + return &Uint{Typ: typ, Values: values} } func (u *Uint) Type() zed.Type { @@ -24,18 +23,12 @@ func (u *Uint) Type() zed.Type { func (u *Uint) NewBuilder() Builder { vals := u.Values - nulls := u.Nulls var voff int return func(b *zcode.Builder) bool { if voff < len(vals) { - if !nulls.Has(uint32(voff)) { - b.Append(zed.EncodeUint(vals[voff])) - } else { - b.Append(nil) - } + b.Append(zed.EncodeUint(vals[voff])) voff++ return true - } return false } diff --git a/vector/union.go b/vector/union.go index 5ecfb759bb..f12b18bb88 100644 --- a/vector/union.go +++ b/vector/union.go @@ -2,11 +2,13 @@ package vector import ( "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" ) type Union struct { mem Typ *zed.TypeUnion + Tags []int32 Values []Any } @@ -21,5 +23,23 @@ func (u *Union) Type() zed.Type { } func (u *Union) NewBuilder() Builder { - return nil //XXX + var valueBuilders []Builder + for _, v := range u.Values { + valueBuilders = append(valueBuilders, v.NewBuilder()) + } + var off int + return func(b *zcode.Builder) bool { + if off >= len(u.Tags) { + return false + } + tag := u.Tags[off] + b.BeginContainer() + b.Append(zed.EncodeInt(int64(tag))) + if !valueBuilders[tag](b) { + return false + } + b.EndContainer() + off++ + return true + } }