From 83d8870b7e7b9c5cbf3900c471b0b7b45e33c6cd Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Wed, 28 Feb 2024 14:17:11 -0600 Subject: [PATCH 1/3] Deduplicate inlined dict type info to reduce RAM This change deduplicates Cadence dictionary type and composite type info, resulting in reduced memory and also persistent storage. More specifically, this encodes inlined atree slab extra data section as two-element array: - array of deduplicated type info - array of deduplicated extra data with type info index --- array.go | 10 +- array_debug.go | 17 +++- array_test.go | 86 ++++++++++++++--- map.go | 10 +- map_test.go | 255 +++++++++++++++++++++++++++++++++++++------------ storable.go | 2 + typeinfo.go | 195 +++++++++++++++++++++++++++++++------ 7 files changed, 464 insertions(+), 111 deletions(-) diff --git a/array.go b/array.go index d8f39487..d5e4fc70 100644 --- a/array.go +++ b/array.go @@ -301,13 +301,13 @@ func (a *ArrayExtraData) isExtraData() bool { // Encode encodes extra data as CBOR array: // // [type info] -func (a *ArrayExtraData) Encode(enc *Encoder) error { +func (a *ArrayExtraData) Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error { err := enc.CBOR.EncodeArrayHead(arrayExtraDataLength) if err != nil { return NewEncodingError(err) } - err = a.TypeInfo.Encode(enc.CBOR) + err = encodeTypeInfo(enc, a.TypeInfo) if err != nil { // Wrap err as external error (if needed) because err is returned by TypeInfo interface. return wrapErrorfAsExternalErrorIfNeeded(err, "failed to encode type info") @@ -840,7 +840,8 @@ func (a *ArrayDataSlab) Encode(enc *Encoder) error { // Encode extra data if a.extraData != nil { - err = a.extraData.Encode(enc) + // Use defaultEncodeTypeInfo to encode root level TypeInfo as is. + err = a.extraData.Encode(enc, defaultEncodeTypeInfo) if err != nil { // err is already categorized by ArrayExtraData.Encode(). return err @@ -1738,7 +1739,8 @@ func (a *ArrayMetaDataSlab) Encode(enc *Encoder) error { // Encode extra data if present if a.extraData != nil { - err = a.extraData.Encode(enc) + // Use defaultEncodeTypeInfo to encode root level TypeInfo as is. + err = a.extraData.Encode(enc, defaultEncodeTypeInfo) if err != nil { // Don't need to wrap because err is already categorized by ArrayExtraData.Encode(). return err diff --git a/array_debug.go b/array_debug.go index eb0d2fe7..7ffa335d 100644 --- a/array_debug.go +++ b/array_debug.go @@ -861,12 +861,27 @@ func hasInlinedComposite(data []byte) (bool, error) { // Parse inlined extra data to find compact map extra data. dec := cbor.NewStreamDecoder(bytes.NewBuffer(data)) + count, err := dec.DecodeArrayHead() if err != nil { return false, NewDecodingError(err) } + if count != inlinedExtraDataArrayCount { + return false, NewDecodingError(fmt.Errorf("failed to decode inlined extra data, expect %d elements, got %d elements", inlinedExtraDataArrayCount, count)) + } - for i := uint64(0); i < count; i++ { + // Skip element 0 (inlined type info) + err = dec.Skip() + if err != nil { + return false, NewDecodingError(err) + } + + // Decoding element 1 (inlined extra data) + extraDataCount, err := dec.DecodeArrayHead() + if err != nil { + return false, NewDecodingError(err) + } + for i := uint64(0); i < extraDataCount; i++ { tagNum, err := dec.DecodeTagNumber() if err != nil { return false, NewDecodingError(err) diff --git a/array_test.go b/array_test.go index 7c64d61c..58a0c9cf 100644 --- a/array_test.go +++ b/array_test.go @@ -3184,11 +3184,19 @@ func TestArrayEncodeDecode(t *testing.T) { 0x18, 0x2a, // inlined extra data + 0x82, + // element 0: array of type info 0x81, - // inlined array extra data + // type info + 0x18, 0x2b, + // element 1: array of extra data + 0x81, + // array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + // array type info ref + 0xd8, 0xf6, + 0x00, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3266,14 +3274,22 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, + // element 0: array of inlined type info + 0x82, + 0x18, 0x2c, + 0x18, 0x2b, + // element 1: array of inlined extra data + 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x01, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3355,13 +3371,21 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, + // element 0: array of inlined type info + 0x82, + 0x18, 0x2c, + 0x18, 0x2b, + // element 1: array of inlined extra data + 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x01, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3454,23 +3478,35 @@ func TestArrayEncodeDecode(t *testing.T) { 0x18, 0x2a, // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x84, + 0x18, 0x2c, + 0x18, 0x2b, + 0x18, 0x2e, + 0x18, 0x2d, + // element 1: array of inlined extra data 0x84, // typeInfo3 0xd8, 0xf7, 0x81, - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, // typeInfo2 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x01, // typeInfo5 0xd8, 0xf7, 0x81, - 0x18, 0x2e, + 0xd8, 0xf6, + 0x02, // typeInfo4 0xd8, 0xf7, 0x81, - 0x18, 0x2d, + 0xd8, 0xf6, + 0x03, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3595,11 +3631,17 @@ func TestArrayEncodeDecode(t *testing.T) { // array data slab flag 0x00, // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements @@ -3744,13 +3786,21 @@ func TestArrayEncodeDecode(t *testing.T) { 0x00, // inlined extra data 0x82, + // element 0: array of inlined extra data + 0x82, + 0x18, 0x2c, + 0x18, 0x2b, + // element 1: array of inlined extra data + 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x01, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements @@ -4064,12 +4114,18 @@ func TestArrayEncodeDecode(t *testing.T) { // array data slab flag (has pointer) 0x40, - // inlined array of extra data + // inlined extra data + 0x82, + // element 0: array of type info + 0x81, + 0x18, 0x2c, + // element 1: array of extra data 0x81, // type info 0xd8, 0xf7, 0x81, - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, diff --git a/map.go b/map.go index 57891d7a..bf142744 100644 --- a/map.go +++ b/map.go @@ -497,14 +497,14 @@ func (m *MapExtraData) isExtraData() bool { // Encode encodes extra data as CBOR array: // // [type info, count, seed] -func (m *MapExtraData) Encode(enc *Encoder) error { +func (m *MapExtraData) Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error { err := enc.CBOR.EncodeArrayHead(mapExtraDataLength) if err != nil { return NewEncodingError(err) } - err = m.TypeInfo.Encode(enc.CBOR) + err = encodeTypeInfo(enc, m.TypeInfo) if err != nil { // Wrap err as external error (if needed) because err is returned by TypeInfo interface. return wrapErrorfAsExternalErrorIfNeeded(err, "failed to encode type info") @@ -2917,7 +2917,8 @@ func (m *MapDataSlab) Encode(enc *Encoder) error { // Encode extra data if m.extraData != nil { - err = m.extraData.Encode(enc) + // Use defaultEncodeTypeInfo to encode root level TypeInfo as is. + err = m.extraData.Encode(enc, defaultEncodeTypeInfo) if err != nil { // Don't need to wrap error as external error because err is already categorized by MapExtraData.Encode(). return err @@ -3918,7 +3919,8 @@ func (m *MapMetaDataSlab) Encode(enc *Encoder) error { // Encode extra data if present if m.extraData != nil { - err = m.extraData.Encode(enc) + // Use defaultEncodeTypeInfo to encode root level TypeInfo as is. + err = m.extraData.Encode(enc, defaultEncodeTypeInfo) if err != nil { // Don't need to wrap error as external error because err is already categorized by MapExtraData.Encode(). return err diff --git a/map_test.go b/map_test.go index ba8ca3a0..f80c8dcb 100644 --- a/map_test.go +++ b/map_test.go @@ -7660,12 +7660,18 @@ func TestMapEncodeDecode(t *testing.T) { // flag: has inlined slab + map data 0x08, - // inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // inlined array extra data 0xd8, 0xf7, 0x81, - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // the following encoded data is valid CBOR @@ -7723,7 +7729,7 @@ func TestMapEncodeDecode(t *testing.T) { require.Equal(t, 2, len(meta.childrenHeaders)) require.Equal(t, uint32(len(stored[id2])), meta.childrenHeaders[0].size) - const inlinedExtraDataSize = 6 + const inlinedExtraDataSize = 11 require.Equal(t, uint32(len(stored[id3])-inlinedExtraDataSize+slabIDSize), meta.childrenHeaders[1].size) // Decode data to new storage @@ -7803,14 +7809,20 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 2 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x82, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -7820,7 +7832,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -7992,14 +8005,21 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 2 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x82, + 0x18, 0x2c, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x82, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8009,7 +8029,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x01, // count: 1 0x01, // seed @@ -8183,14 +8204,20 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8200,7 +8227,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8210,7 +8238,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8220,7 +8249,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8453,14 +8483,23 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of type info + 0x84, + 0x18, 0x2c, + 0x18, 0x2e, + 0x18, 0x2b, + 0x18, 0x2d, + // element 1: array of extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info: 44 - 0x18, 0x2c, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8471,7 +8510,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info: 46 - 0x18, 0x2e, + 0xd8, 0xf6, + 0x01, // count: 1 0x01, // seed @@ -8482,7 +8522,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info: 43 - 0x18, 0x2b, + 0xd8, 0xf6, + 0x02, // count: 1 0x01, // seed @@ -8493,7 +8534,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info: 45 - 0x18, 0x2d, + 0xd8, 0xf6, + 0x03, // count: 1 0x01, // seed @@ -8720,14 +8762,20 @@ func TestMapEncodeDecode(t *testing.T) { // flag: map data 0x08, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8737,7 +8785,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8747,7 +8796,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8756,7 +8806,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8903,14 +8954,20 @@ func TestMapEncodeDecode(t *testing.T) { // flag: map data 0x08, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8920,7 +8977,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8930,7 +8988,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -8939,7 +8998,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -9201,14 +9261,23 @@ func TestMapEncodeDecode(t *testing.T) { // flag: map data 0x08, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x84, + 0x18, 0x2b, + 0x18, 0x2c, + 0x18, 0x2d, + 0x18, 0x2e, + // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -9218,7 +9287,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2c, + 0xd8, 0xf6, + 0x01, // count: 1 0x01, // seed @@ -9228,7 +9298,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2d, + 0xd8, 0xf6, + 0x02, // count: 1 0x01, // seed @@ -9237,7 +9308,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2e, + 0xd8, 0xf6, + 0x03, // count: 1 0x01, // seed @@ -9384,14 +9456,23 @@ func TestMapEncodeDecode(t *testing.T) { // flag: map data 0x08, - // 4 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x84, + 0x18, 0x2b, + 0x18, 0x2c, + 0x18, 0x2d, + 0x18, 0x2e, + // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -9401,7 +9482,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2c, + 0xd8, 0xf6, + 0x01, // count: 1 0x01, // seed @@ -9411,7 +9493,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2d, + 0xd8, 0xf6, + 0x02, // count: 1 0x01, // seed @@ -9420,7 +9503,8 @@ func TestMapEncodeDecode(t *testing.T) { 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2e, + 0xd8, 0xf6, + 0x03, // count: 1 0x01, // seed @@ -10510,14 +10594,20 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // array of inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -10985,14 +11075,20 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // array of inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // element 0 // inlined array extra data 0xd8, 0xf7, 0x81, // type info - 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // the following encoded data is valid CBOR @@ -11280,7 +11376,12 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 1 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0xd8, 0xf6, 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // element 0 // inlined composite extra data @@ -11289,7 +11390,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count 0x01, // seed @@ -11448,7 +11550,12 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 1 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0xd8, 0xf6, 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // element 0 // inlined composite extra data @@ -11457,7 +11564,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -11623,7 +11731,12 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 1 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0xd8, 0xf6, 0x18, 0x2b, + // element 1: array of inlined extra data 0x81, // element 0 // inlined composite extra data @@ -11632,7 +11745,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -11813,7 +11927,12 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 3 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0xd8, 0xf6, 0x18, 0x2b, + // element 1: array of inlined extra data 0x83, // element 0 // inlined composite extra data @@ -11822,7 +11941,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -11841,7 +11961,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -11860,7 +11981,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -12049,7 +12171,12 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 2 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x81, + 0xd8, 0xf6, 0x18, 0x2b, + // element 1: array of inlined extra data 0x82, // element 0 // inlined map extra data @@ -12058,7 +12185,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -12076,7 +12204,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 1 0x01, // seed @@ -12247,7 +12376,13 @@ func TestMapEncodeDecode(t *testing.T) { // seed 0x1b, 0x52, 0xa8, 0x78, 0x3, 0x85, 0x2c, 0xaa, 0x49, - // 2 inlined slab extra data + // inlined extra data + 0x82, + // element 0: array of inlined type info + 0x82, + 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, 0x18, 0x2c, + // element 1: array of inlined extra data 0x82, // element 0 // inlined composite extra data @@ -12256,7 +12391,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2b, + 0xd8, 0xf6, + 0x00, // count: 2 0x02, // seed @@ -12274,7 +12410,8 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, 0x18, 0x2c, + 0xd8, 0xf6, + 0x01, // count: 2 0x02, // seed diff --git a/storable.go b/storable.go index a59529c8..634c4572 100644 --- a/storable.go +++ b/storable.go @@ -76,6 +76,8 @@ const ( // As of Oct. 2, 2023, Cadence uses tag numbers from 128 to 224. // See runtime/interpreter/encode.go at github.com/onflow/cadence. + CBORTagTypeInfoRef = 246 + CBORTagInlinedArrayExtraData = 247 CBORTagInlinedMapExtraData = 248 CBORTagInlinedCompactMapExtraData = 249 diff --git a/typeinfo.go b/typeinfo.go index 86c9fe67..a2eacddb 100644 --- a/typeinfo.go +++ b/typeinfo.go @@ -41,9 +41,19 @@ type TypeInfoDecoder func( error, ) +// encodeTypeInfo encodes TypeInfo either: +// - as is (for TypeInfo in root slab extra data section), or +// - as index of inlined TypeInfos (for TypeInfo in inlined slab extra data section) +type encodeTypeInfo func(*Encoder, TypeInfo) error + +// defaultEncodeTypeInfo encodes TypeInfo as is. +func defaultEncodeTypeInfo(enc *Encoder, typeInfo TypeInfo) error { + return typeInfo.Encode(enc.CBOR) +} + type ExtraData interface { isExtraData() bool - Encode(enc *Encoder) error + Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error } // compactMapExtraData is used for inlining compact values. @@ -64,14 +74,14 @@ func (c *compactMapExtraData) isExtraData() bool { return true } -func (c *compactMapExtraData) Encode(enc *Encoder) error { +func (c *compactMapExtraData) Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error { err := enc.CBOR.EncodeArrayHead(compactMapExtraDataLength) if err != nil { return NewEncodingError(err) } // element 0: map extra data - err = c.mapExtraData.Encode(enc) + err = c.mapExtraData.Encode(enc, encodeTypeInfo) if err != nil { return err } @@ -200,27 +210,64 @@ type compactMapTypeInfo struct { keys []ComparableStorable } +type extraDataInfo struct { + data ExtraData + typeInfoIndex int +} + type InlinedExtraData struct { - extraData []ExtraData - compactMapTypes map[string]compactMapTypeInfo - arrayTypes map[string]int + extraData []extraDataInfo // Used to encode deduplicated ExtraData in order + typeInfo []TypeInfo // Used to encode deduplicated TypeInfo in order + compactMapTypeSet map[string]compactMapTypeInfo // Used to deduplicate compactMapExtraData by TypeInfo.Identifier() + sorted field names + arrayExtraDataSet map[string]int // Used to deduplicate arrayExtraData by TypeInfo.Identifier() + typeInfoSet map[string]int // Used to deduplicate TypeInfo by TypeInfo.Identifier() } func newInlinedExtraData() *InlinedExtraData { + // Maps used for deduplication are initialized lazily. return &InlinedExtraData{} } -// Encode encodes inlined extra data as CBOR array. +const inlinedExtraDataArrayCount = 2 + +// Encode encodes inlined extra data as 2-element array: +// +// +-----------------------+------------------------+ +// | [+ inlined type info] | [+ inlined extra data] | +// +-----------------------+------------------------+ func (ied *InlinedExtraData) Encode(enc *Encoder) error { - err := enc.CBOR.EncodeArrayHead(uint64(len(ied.extraData))) + var err error + + err = enc.CBOR.EncodeArrayHead(inlinedExtraDataArrayCount) if err != nil { return NewEncodingError(err) } - var tagNum uint64 + // element 0: deduplicated array of type info + err = enc.CBOR.EncodeArrayHead(uint64(len(ied.typeInfo))) + if err != nil { + return NewEncodingError(err) + } + + // Encode inlined type info + for _, typeInfo := range ied.typeInfo { + err = typeInfo.Encode(enc.CBOR) + if err != nil { + return NewEncodingError(err) + } + } + + // element 1: deduplicated array of extra data + err = enc.CBOR.EncodeArrayHead(uint64(len(ied.extraData))) + if err != nil { + return NewEncodingError(err) + } + // Encode inlined extra data for _, extraData := range ied.extraData { - switch extraData.(type) { + var tagNum uint64 + + switch extraData.data.(type) { case *ArrayExtraData: tagNum = CBORTagInlinedArrayExtraData @@ -239,7 +286,25 @@ func (ied *InlinedExtraData) Encode(enc *Encoder) error { return NewEncodingError(err) } - err = extraData.Encode(enc) + err = extraData.data.Encode(enc, func(enc *Encoder, typeInfo TypeInfo) error { + id := typeInfo.Identifier() + index, exist := ied.typeInfoSet[id] + if !exist { + return NewEncodingError(fmt.Errorf("failed to encode type info ref %s (%T)", id, typeInfo)) + } + + err := enc.CBOR.EncodeTagHead(CBORTagTypeInfoRef) + if err != nil { + return NewEncodingError(err) + } + + err = enc.CBOR.EncodeUint64(uint64(index)) + if err != nil { + return NewEncodingError(err) + } + + return nil + }) if err != nil { return err } @@ -267,12 +332,60 @@ func newInlinedExtraDataFromData( return nil, nil, NewDecodingError(err) } - if count == 0 { + if count != inlinedExtraDataArrayCount { + return nil, nil, NewDecodingError(fmt.Errorf("failed to decode inlined extra data: expect %d elements, got %d elements", inlinedExtraDataArrayCount, count)) + } + + // element 0: array of deduplicated type info + typeInfoCount, err := dec.DecodeArrayHead() + if err != nil { + return nil, nil, NewDecodingError(err) + } + + if typeInfoCount == 0 { + return nil, nil, NewDecodingError(fmt.Errorf("failed to decode inlined extra data: expect at least one inlined type info")) + } + + inlinedTypeInfo := make([]TypeInfo, typeInfoCount) + for i := uint64(0); i < typeInfoCount; i++ { + inlinedTypeInfo[i], err = decodeTypeInfo(dec) + if err != nil { + return nil, nil, err + } + } + + typeInfoRefDecoder := func(decoder *cbor.StreamDecoder) (TypeInfo, error) { + tagNum, err := decoder.DecodeTagNumber() + if err != nil { + return nil, err + } + if tagNum != CBORTagTypeInfoRef { + return nil, NewDecodingError(fmt.Errorf("failed to decode type info ref: expect tag number %d, got %d", CBORTagTypeInfoRef, tagNum)) + } + + index, err := decoder.DecodeUint64() + if err != nil { + return nil, NewDecodingError(err) + } + if index >= uint64(len(inlinedTypeInfo)) { + return nil, NewDecodingError(fmt.Errorf("failed to decode type info ref: expect index < %d, got %d", len(inlinedTypeInfo), index)) + } + + return inlinedTypeInfo[int(index)], nil + } + + // element 1: array of deduplicated extra data info + extraDataCount, err := dec.DecodeArrayHead() + if err != nil { + return nil, nil, NewDecodingError(err) + } + + if extraDataCount == 0 { return nil, nil, NewDecodingError(fmt.Errorf("failed to decode inlined extra data: expect at least one inlined extra data")) } - inlinedExtraData := make([]ExtraData, count) - for i := uint64(0); i < count; i++ { + inlinedExtraData := make([]ExtraData, extraDataCount) + for i := uint64(0); i < extraDataCount; i++ { tagNum, err := dec.DecodeTagNumber() if err != nil { return nil, nil, NewDecodingError(err) @@ -280,19 +393,19 @@ func newInlinedExtraDataFromData( switch tagNum { case CBORTagInlinedArrayExtraData: - inlinedExtraData[i], err = newArrayExtraData(dec, decodeTypeInfo) + inlinedExtraData[i], err = newArrayExtraData(dec, typeInfoRefDecoder) if err != nil { return nil, nil, err } case CBORTagInlinedMapExtraData: - inlinedExtraData[i], err = newMapExtraData(dec, decodeTypeInfo) + inlinedExtraData[i], err = newMapExtraData(dec, typeInfoRefDecoder) if err != nil { return nil, nil, err } case CBORTagInlinedCompactMapExtraData: - inlinedExtraData[i], err = newCompactMapExtraData(dec, decodeTypeInfo, decodeStorable) + inlinedExtraData[i], err = newCompactMapExtraData(dec, typeInfoRefDecoder, decodeStorable) if err != nil { return nil, nil, err } @@ -305,31 +418,55 @@ func newInlinedExtraDataFromData( return inlinedExtraData, data[dec.NumBytesDecoded():], nil } +// addTypeInfo returns index of deduplicated type info. +func (ied *InlinedExtraData) addTypeInfo(typeInfo TypeInfo) int { + if ied.typeInfoSet == nil { + ied.typeInfoSet = make(map[string]int) + } + + id := typeInfo.Identifier() + index, exist := ied.typeInfoSet[id] + if exist { + return index + } + + index = len(ied.typeInfo) + ied.typeInfo = append(ied.typeInfo, typeInfo) + ied.typeInfoSet[id] = index + + return index +} + // addArrayExtraData returns index of deduplicated array extra data. // Array extra data is deduplicated by array type info ID because array // extra data only contains type info. func (ied *InlinedExtraData) addArrayExtraData(data *ArrayExtraData) int { - if ied.arrayTypes == nil { - ied.arrayTypes = make(map[string]int) + if ied.arrayExtraDataSet == nil { + ied.arrayExtraDataSet = make(map[string]int) } id := data.TypeInfo.Identifier() - index, exist := ied.arrayTypes[id] + index, exist := ied.arrayExtraDataSet[id] if exist { return index } + typeInfoIndex := ied.addTypeInfo(data.TypeInfo) + index = len(ied.extraData) - ied.extraData = append(ied.extraData, data) - ied.arrayTypes[id] = index + ied.extraData = append(ied.extraData, extraDataInfo{data, typeInfoIndex}) + ied.arrayExtraDataSet[id] = index + return index } // addMapExtraData returns index of map extra data. // Map extra data is not deduplicated because it also contains count and seed. func (ied *InlinedExtraData) addMapExtraData(data *MapExtraData) int { + typeInfoIndex := ied.addTypeInfo(data.TypeInfo) + index := len(ied.extraData) - ied.extraData = append(ied.extraData, data) + ied.extraData = append(ied.extraData, extraDataInfo{data, typeInfoIndex}) return index } @@ -341,12 +478,12 @@ func (ied *InlinedExtraData) addCompactMapExtraData( keys []ComparableStorable, ) (int, []ComparableStorable) { - if ied.compactMapTypes == nil { - ied.compactMapTypes = make(map[string]compactMapTypeInfo) + if ied.compactMapTypeSet == nil { + ied.compactMapTypeSet = make(map[string]compactMapTypeInfo) } id := makeCompactMapTypeID(data.TypeInfo, keys) - info, exist := ied.compactMapTypes[id] + info, exist := ied.compactMapTypeSet[id] if exist { return info.index, info.keys } @@ -357,10 +494,12 @@ func (ied *InlinedExtraData) addCompactMapExtraData( keys: keys, } + typeInfoIndex := ied.addTypeInfo(data.TypeInfo) + index := len(ied.extraData) - ied.extraData = append(ied.extraData, compactMapData) + ied.extraData = append(ied.extraData, extraDataInfo{compactMapData, typeInfoIndex}) - ied.compactMapTypes[id] = compactMapTypeInfo{ + ied.compactMapTypeSet[id] = compactMapTypeInfo{ keys: keys, index: index, } From 68259507247d0ecea6c2b59310849324375d3abe Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Wed, 28 Feb 2024 15:11:07 -0600 Subject: [PATCH 2/3] Preallocate map in FastCommit --- storage.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage.go b/storage.go index 0b324381..934678ec 100644 --- a/storage.go +++ b/storage.go @@ -910,7 +910,7 @@ func (s *PersistentSlabStorage) FastCommit(numWorkers int) error { // process the results while encoders are working // we need to capture them inside a map // again so we can apply them in order of keys - encSlabByID := make(map[SlabID][]byte) + encSlabByID := make(map[SlabID][]byte, len(keysWithOwners)) for i := 0; i < len(keysWithOwners); i++ { result := <-results // if any error return From 35fdb7e4e109b999d627cafe9ff89df86e21dd24 Mon Sep 17 00:00:00 2001 From: Faye Amacker <33205765+fxamacker@users.noreply.github.com> Date: Fri, 8 Mar 2024 17:08:14 -0600 Subject: [PATCH 3/3] Deduplicate inlined type info if repeated --- array.go | 4 + array_test.go | 67 +++++----------- map.go | 4 + map_test.go | 129 ++++++++----------------------- typeinfo.go | 209 +++++++++++++++++++++++++++++++------------------- 5 files changed, 194 insertions(+), 219 deletions(-) diff --git a/array.go b/array.go index d5e4fc70..69affd85 100644 --- a/array.go +++ b/array.go @@ -298,6 +298,10 @@ func (a *ArrayExtraData) isExtraData() bool { return true } +func (a *ArrayExtraData) Type() TypeInfo { + return a.TypeInfo +} + // Encode encodes extra data as CBOR array: // // [type info] diff --git a/array_test.go b/array_test.go index 58a0c9cf..8f42abac 100644 --- a/array_test.go +++ b/array_test.go @@ -3186,17 +3186,14 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of type info - 0x81, - // type info - 0x18, 0x2b, + 0x80, // element 1: array of extra data 0x81, // array extra data 0xd8, 0xf7, 0x81, // array type info ref - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3275,21 +3272,17 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x82, - 0x18, 0x2c, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x01, + 0x18, 0x2b, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3372,20 +3365,16 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x82, - 0x18, 0x2c, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x01, + 0x18, 0x2b, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3480,33 +3469,25 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x84, - 0x18, 0x2c, - 0x18, 0x2b, - 0x18, 0x2e, - 0x18, 0x2d, + 0x80, // element 1: array of inlined extra data 0x84, // typeInfo3 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, // typeInfo2 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x01, + 0x18, 0x2b, // typeInfo5 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x02, + 0x18, 0x2e, // typeInfo4 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x03, + 0x18, 0x2d, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x02, @@ -3633,15 +3614,13 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements @@ -3787,20 +3766,16 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined extra data - 0x82, - 0x18, 0x2c, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x82, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x01, + 0x18, 0x2b, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, // CBOR encoded array elements @@ -4117,15 +4092,13 @@ func TestArrayEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of type info - 0x81, - 0x18, 0x2c, + 0x80, // element 1: array of extra data 0x81, // type info 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, // CBOR encoded array head (fixed size 3 byte) 0x99, 0x00, 0x0b, diff --git a/map.go b/map.go index bf142744..d34af66c 100644 --- a/map.go +++ b/map.go @@ -494,6 +494,10 @@ func (m *MapExtraData) isExtraData() bool { return true } +func (m *MapExtraData) Type() TypeInfo { + return m.TypeInfo +} + // Encode encodes extra data as CBOR array: // // [type info, count, seed] diff --git a/map_test.go b/map_test.go index f80c8dcb..397bdce1 100644 --- a/map_test.go +++ b/map_test.go @@ -7663,15 +7663,13 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // inlined array extra data 0xd8, 0xf7, 0x81, - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // the following encoded data is valid CBOR @@ -7729,7 +7727,7 @@ func TestMapEncodeDecode(t *testing.T) { require.Equal(t, 2, len(meta.childrenHeaders)) require.Equal(t, uint32(len(stored[id2])), meta.childrenHeaders[0].size) - const inlinedExtraDataSize = 11 + const inlinedExtraDataSize = 8 require.Equal(t, uint32(len(stored[id3])-inlinedExtraDataSize+slabIDSize), meta.childrenHeaders[1].size) // Decode data to new storage @@ -7827,7 +7825,6 @@ func TestMapEncodeDecode(t *testing.T) { 0x01, // seed 0x1b, 0xa9, 0x3a, 0x2d, 0x6f, 0x53, 0x49, 0xaa, 0xdd, - // element 1 // inlined map extra data 0xd8, 0xf8, 0x83, @@ -8008,18 +8005,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x82, - 0x18, 0x2c, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x82, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, // count: 1 0x01, // seed @@ -8028,9 +8021,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x01, + 0x18, 0x2b, // count: 1 0x01, // seed @@ -8486,20 +8477,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of type info - 0x84, - 0x18, 0x2c, - 0x18, 0x2e, - 0x18, 0x2b, - 0x18, 0x2d, + 0x80, // element 1: array of extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, - // type info: 44 - 0xd8, 0xf6, - 0x00, + 0x18, 0x2c, // count: 1 0x01, // seed @@ -8509,9 +8494,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info: 46 - 0xd8, 0xf6, - 0x01, + 0x18, 0x2e, // count: 1 0x01, // seed @@ -8521,9 +8504,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info: 43 - 0xd8, 0xf6, - 0x02, + 0x18, 0x2b, // count: 1 0x01, // seed @@ -8533,9 +8514,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info: 45 - 0xd8, 0xf6, - 0x03, + 0x18, 0x2d, // count: 1 0x01, // seed @@ -9264,20 +9243,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x84, - 0x18, 0x2b, - 0x18, 0x2c, - 0x18, 0x2d, - 0x18, 0x2e, + 0x80, // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // count: 1 0x01, // seed @@ -9286,9 +9259,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x01, + 0x18, 0x2c, // count: 1 0x01, // seed @@ -9297,9 +9268,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x02, + 0x18, 0x2d, // count: 1 0x01, // seed @@ -9307,9 +9276,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x03, + 0x18, 0x2e, // count: 1 0x01, // seed @@ -9459,20 +9426,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x84, - 0x18, 0x2b, - 0x18, 0x2c, - 0x18, 0x2d, - 0x18, 0x2e, + 0x80, // element 1: array of inlined extra data 0x84, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // count: 1 0x01, // seed @@ -9481,9 +9442,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x01, + 0x18, 0x2c, // count: 1 0x01, // seed @@ -9492,9 +9451,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x02, + 0x18, 0x2d, // count: 1 0x01, // seed @@ -9502,9 +9459,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x03, + 0x18, 0x2e, // count: 1 0x01, // seed @@ -10597,17 +10552,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // element 0 // inlined map extra data 0xd8, 0xf8, 0x83, - // type info - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // count: 1 0x01, // seed @@ -11078,17 +11030,14 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // element 0 // inlined array extra data 0xd8, 0xf7, 0x81, - // type info - 0xd8, 0xf6, - 0x00, + 0x18, 0x2b, // the following encoded data is valid CBOR @@ -11379,8 +11328,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0xd8, 0xf6, 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // element 0 @@ -11390,8 +11338,7 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, - 0x00, + 0xd8, 0xf6, 0x18, 0x2b, // count 0x01, // seed @@ -11553,8 +11500,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0xd8, 0xf6, 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // element 0 @@ -11564,8 +11510,7 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, - 0x00, + 0xd8, 0xf6, 0x18, 0x2b, // count: 2 0x02, // seed @@ -11734,8 +11679,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x81, - 0xd8, 0xf6, 0x18, 0x2b, + 0x80, // element 1: array of inlined extra data 0x81, // element 0 @@ -11745,8 +11689,7 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, - 0x00, + 0xd8, 0xf6, 0x18, 0x2b, // count: 2 0x02, // seed @@ -12379,9 +12322,7 @@ func TestMapEncodeDecode(t *testing.T) { // inlined extra data 0x82, // element 0: array of inlined type info - 0x82, - 0xd8, 0xf6, 0x18, 0x2b, - 0xd8, 0xf6, 0x18, 0x2c, + 0x80, // element 1: array of inlined extra data 0x82, // element 0 @@ -12391,8 +12332,7 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, - 0x00, + 0xd8, 0xf6, 0x18, 0x2b, // count: 2 0x02, // seed @@ -12410,8 +12350,7 @@ func TestMapEncodeDecode(t *testing.T) { // map extra data 0x83, // type info - 0xd8, 0xf6, - 0x01, + 0xd8, 0xf6, 0x18, 0x2c, // count: 2 0x02, // seed diff --git a/typeinfo.go b/typeinfo.go index a2eacddb..61cdcc49 100644 --- a/typeinfo.go +++ b/typeinfo.go @@ -19,6 +19,7 @@ package atree import ( + "bytes" "encoding/binary" "fmt" "sort" @@ -51,8 +52,49 @@ func defaultEncodeTypeInfo(enc *Encoder, typeInfo TypeInfo) error { return typeInfo.Encode(enc.CBOR) } +func decodeTypeInfoRefIfNeeded(inlinedTypeInfo []TypeInfo, defaultTypeInfoDecoder TypeInfoDecoder) TypeInfoDecoder { + if len(inlinedTypeInfo) == 0 { + return defaultTypeInfoDecoder + } + + return func(decoder *cbor.StreamDecoder) (TypeInfo, error) { + rawTypeInfo, err := decoder.DecodeRawBytes() + if err != nil { + return nil, NewDecodingError(fmt.Errorf("failed to decode raw type info: %w", err)) + } + + if len(rawTypeInfo) > len(typeInfoRefTagHeadAndTagNumber) && + bytes.Equal( + rawTypeInfo[:len(typeInfoRefTagHeadAndTagNumber)], + typeInfoRefTagHeadAndTagNumber) { + + // Type info is encoded as type info ref. + + var index uint64 + + err = cbor.Unmarshal(rawTypeInfo[len(typeInfoRefTagHeadAndTagNumber):], &index) + if err != nil { + return nil, NewDecodingError(err) + } + + if index >= uint64(len(inlinedTypeInfo)) { + return nil, NewDecodingError(fmt.Errorf("failed to decode type info ref: expect index < %d, got %d", len(inlinedTypeInfo), index)) + } + + return inlinedTypeInfo[int(index)], nil + } + + // Decode type info as is. + + dec := cbor.NewByteStreamDecoder(rawTypeInfo) + + return defaultTypeInfoDecoder(dec) + } +} + type ExtraData interface { isExtraData() bool + Type() TypeInfo Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error } @@ -74,6 +116,10 @@ func (c *compactMapExtraData) isExtraData() bool { return true } +func (c *compactMapExtraData) Type() TypeInfo { + return c.mapExtraData.TypeInfo +} + func (c *compactMapExtraData) Encode(enc *Encoder, encodeTypeInfo encodeTypeInfo) error { err := enc.CBOR.EncodeArrayHead(compactMapExtraDataLength) if err != nil { @@ -210,17 +256,10 @@ type compactMapTypeInfo struct { keys []ComparableStorable } -type extraDataInfo struct { - data ExtraData - typeInfoIndex int -} - type InlinedExtraData struct { - extraData []extraDataInfo // Used to encode deduplicated ExtraData in order - typeInfo []TypeInfo // Used to encode deduplicated TypeInfo in order + extraData []ExtraData // Used to encode deduplicated ExtraData in order compactMapTypeSet map[string]compactMapTypeInfo // Used to deduplicate compactMapExtraData by TypeInfo.Identifier() + sorted field names arrayExtraDataSet map[string]int // Used to deduplicate arrayExtraData by TypeInfo.Identifier() - typeInfoSet map[string]int // Used to deduplicate TypeInfo by TypeInfo.Identifier() } func newInlinedExtraData() *InlinedExtraData { @@ -230,12 +269,17 @@ func newInlinedExtraData() *InlinedExtraData { const inlinedExtraDataArrayCount = 2 +var typeInfoRefTagHeadAndTagNumber = []byte{0xd8, CBORTagTypeInfoRef} + // Encode encodes inlined extra data as 2-element array: // // +-----------------------+------------------------+ // | [+ inlined type info] | [+ inlined extra data] | // +-----------------------+------------------------+ func (ied *InlinedExtraData) Encode(enc *Encoder) error { + + typeInfos, typeInfoIndexes := findDuplicateTypeInfo(ied.extraData) + var err error err = enc.CBOR.EncodeArrayHead(inlinedExtraDataArrayCount) @@ -243,14 +287,14 @@ func (ied *InlinedExtraData) Encode(enc *Encoder) error { return NewEncodingError(err) } - // element 0: deduplicated array of type info - err = enc.CBOR.EncodeArrayHead(uint64(len(ied.typeInfo))) + // element 0: array of duplicate type info + err = enc.CBOR.EncodeArrayHead(uint64(len(typeInfos))) if err != nil { return NewEncodingError(err) } - // Encode inlined type info - for _, typeInfo := range ied.typeInfo { + // Encode type info + for _, typeInfo := range typeInfos { err = typeInfo.Encode(enc.CBOR) if err != nil { return NewEncodingError(err) @@ -267,7 +311,7 @@ func (ied *InlinedExtraData) Encode(enc *Encoder) error { for _, extraData := range ied.extraData { var tagNum uint64 - switch extraData.data.(type) { + switch extraData.(type) { case *ArrayExtraData: tagNum = CBORTagInlinedArrayExtraData @@ -286,14 +330,18 @@ func (ied *InlinedExtraData) Encode(enc *Encoder) error { return NewEncodingError(err) } - err = extraData.data.Encode(enc, func(enc *Encoder, typeInfo TypeInfo) error { - id := typeInfo.Identifier() - index, exist := ied.typeInfoSet[id] + err = extraData.Encode(enc, func(enc *Encoder, typeInfo TypeInfo) error { + index, exist := typeInfoIndexes[typeInfo.Identifier()] if !exist { - return NewEncodingError(fmt.Errorf("failed to encode type info ref %s (%T)", id, typeInfo)) + // typeInfo is not encoded separately, so encode typeInfo as is here. + err = typeInfo.Encode(enc.CBOR) + if err != nil { + return NewEncodingError(err) + } + return nil } - err := enc.CBOR.EncodeTagHead(CBORTagTypeInfoRef) + err := enc.CBOR.EncodeRawBytes(typeInfoRefTagHeadAndTagNumber) if err != nil { return NewEncodingError(err) } @@ -318,11 +366,65 @@ func (ied *InlinedExtraData) Encode(enc *Encoder) error { return nil } +func findDuplicateTypeInfo(extraData []ExtraData) ([]TypeInfo, map[string]int) { + if len(extraData) < 2 { + // No duplicate type info + return nil, nil + } + + // typeInfoSet is used to deduplicate TypeInfo. + // typeInfoSet key: TypeInfo.Identifier() + // typeInfoSet value: indexes of extra data containing this type info + typeInfoSet := make(map[string][]int, len(extraData)) + + for i, data := range extraData { + typeID := data.Type().Identifier() + + indexes := typeInfoSet[typeID] + typeInfoSet[typeID] = append(indexes, i) + } + + if len(extraData) == len(typeInfoSet) { + // No duplicate type info + return nil, nil + } + + firstExtraDataIndexContainingDuplicateTypeInfo := make([]int, 0, len(typeInfoSet)) + for _, v := range typeInfoSet { + if len(v) > 1 { + firstExtraDataIndexContainingDuplicateTypeInfo = append(firstExtraDataIndexContainingDuplicateTypeInfo, v[0]) + } + } + + switch len(firstExtraDataIndexContainingDuplicateTypeInfo) { + case 1: + extraDataIndex := firstExtraDataIndexContainingDuplicateTypeInfo[0] + typeInfo := extraData[extraDataIndex].Type() + return []TypeInfo{typeInfo}, map[string]int{typeInfo.Identifier(): 0} + + default: + sort.Ints(firstExtraDataIndexContainingDuplicateTypeInfo) + + typeInfos := make([]TypeInfo, 0, len(firstExtraDataIndexContainingDuplicateTypeInfo)) + typeInfoIndexes := make(map[string]int) + + for _, extraDataIndex := range firstExtraDataIndexContainingDuplicateTypeInfo { + index := len(typeInfos) + + typeInfo := extraData[extraDataIndex].Type() + typeInfos = append(typeInfos, typeInfo) + typeInfoIndexes[typeInfo.Identifier()] = index + } + + return typeInfos, typeInfoIndexes + } +} + func newInlinedExtraDataFromData( data []byte, decMode cbor.DecMode, decodeStorable StorableDecoder, - decodeTypeInfo TypeInfoDecoder, + defaultDecodeTypeInfo TypeInfoDecoder, ) ([]ExtraData, []byte, error) { dec := decMode.NewByteStreamDecoder(data) @@ -336,43 +438,21 @@ func newInlinedExtraDataFromData( return nil, nil, NewDecodingError(fmt.Errorf("failed to decode inlined extra data: expect %d elements, got %d elements", inlinedExtraDataArrayCount, count)) } - // element 0: array of deduplicated type info + // element 0: array of duplicate type info typeInfoCount, err := dec.DecodeArrayHead() if err != nil { return nil, nil, NewDecodingError(err) } - if typeInfoCount == 0 { - return nil, nil, NewDecodingError(fmt.Errorf("failed to decode inlined extra data: expect at least one inlined type info")) - } - - inlinedTypeInfo := make([]TypeInfo, typeInfoCount) + inlinedTypeInfo := make([]TypeInfo, int(typeInfoCount)) for i := uint64(0); i < typeInfoCount; i++ { - inlinedTypeInfo[i], err = decodeTypeInfo(dec) + inlinedTypeInfo[i], err = defaultDecodeTypeInfo(dec) if err != nil { - return nil, nil, err + return nil, nil, wrapErrorfAsExternalErrorIfNeeded(err, "failed to decode typeInfo") } } - typeInfoRefDecoder := func(decoder *cbor.StreamDecoder) (TypeInfo, error) { - tagNum, err := decoder.DecodeTagNumber() - if err != nil { - return nil, err - } - if tagNum != CBORTagTypeInfoRef { - return nil, NewDecodingError(fmt.Errorf("failed to decode type info ref: expect tag number %d, got %d", CBORTagTypeInfoRef, tagNum)) - } - - index, err := decoder.DecodeUint64() - if err != nil { - return nil, NewDecodingError(err) - } - if index >= uint64(len(inlinedTypeInfo)) { - return nil, NewDecodingError(fmt.Errorf("failed to decode type info ref: expect index < %d, got %d", len(inlinedTypeInfo), index)) - } - - return inlinedTypeInfo[int(index)], nil - } + decodeTypeInfo := decodeTypeInfoRefIfNeeded(inlinedTypeInfo, defaultDecodeTypeInfo) // element 1: array of deduplicated extra data info extraDataCount, err := dec.DecodeArrayHead() @@ -393,19 +473,19 @@ func newInlinedExtraDataFromData( switch tagNum { case CBORTagInlinedArrayExtraData: - inlinedExtraData[i], err = newArrayExtraData(dec, typeInfoRefDecoder) + inlinedExtraData[i], err = newArrayExtraData(dec, decodeTypeInfo) if err != nil { return nil, nil, err } case CBORTagInlinedMapExtraData: - inlinedExtraData[i], err = newMapExtraData(dec, typeInfoRefDecoder) + inlinedExtraData[i], err = newMapExtraData(dec, decodeTypeInfo) if err != nil { return nil, nil, err } case CBORTagInlinedCompactMapExtraData: - inlinedExtraData[i], err = newCompactMapExtraData(dec, typeInfoRefDecoder, decodeStorable) + inlinedExtraData[i], err = newCompactMapExtraData(dec, decodeTypeInfo, decodeStorable) if err != nil { return nil, nil, err } @@ -418,25 +498,6 @@ func newInlinedExtraDataFromData( return inlinedExtraData, data[dec.NumBytesDecoded():], nil } -// addTypeInfo returns index of deduplicated type info. -func (ied *InlinedExtraData) addTypeInfo(typeInfo TypeInfo) int { - if ied.typeInfoSet == nil { - ied.typeInfoSet = make(map[string]int) - } - - id := typeInfo.Identifier() - index, exist := ied.typeInfoSet[id] - if exist { - return index - } - - index = len(ied.typeInfo) - ied.typeInfo = append(ied.typeInfo, typeInfo) - ied.typeInfoSet[id] = index - - return index -} - // addArrayExtraData returns index of deduplicated array extra data. // Array extra data is deduplicated by array type info ID because array // extra data only contains type info. @@ -451,10 +512,8 @@ func (ied *InlinedExtraData) addArrayExtraData(data *ArrayExtraData) int { return index } - typeInfoIndex := ied.addTypeInfo(data.TypeInfo) - index = len(ied.extraData) - ied.extraData = append(ied.extraData, extraDataInfo{data, typeInfoIndex}) + ied.extraData = append(ied.extraData, data) ied.arrayExtraDataSet[id] = index return index @@ -463,10 +522,8 @@ func (ied *InlinedExtraData) addArrayExtraData(data *ArrayExtraData) int { // addMapExtraData returns index of map extra data. // Map extra data is not deduplicated because it also contains count and seed. func (ied *InlinedExtraData) addMapExtraData(data *MapExtraData) int { - typeInfoIndex := ied.addTypeInfo(data.TypeInfo) - index := len(ied.extraData) - ied.extraData = append(ied.extraData, extraDataInfo{data, typeInfoIndex}) + ied.extraData = append(ied.extraData, data) return index } @@ -494,10 +551,8 @@ func (ied *InlinedExtraData) addCompactMapExtraData( keys: keys, } - typeInfoIndex := ied.addTypeInfo(data.TypeInfo) - index := len(ied.extraData) - ied.extraData = append(ied.extraData, extraDataInfo{compactMapData, typeInfoIndex}) + ied.extraData = append(ied.extraData, compactMapData) ied.compactMapTypeSet[id] = compactMapTypeInfo{ keys: keys,