From 31771d70762c5ca58b1c0c18babee660a7b5767d Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Wed, 30 Mar 2022 00:35:29 -0700 Subject: [PATCH] fix: address review feedback --- lib/buffer-writer.js | 142 ++++++++++++------------------------- test/test-buffer-writer.js | 41 ++++++++--- 2 files changed, 74 insertions(+), 109 deletions(-) diff --git a/lib/buffer-writer.js b/lib/buffer-writer.js index e74b717..0c99c52 100644 --- a/lib/buffer-writer.js +++ b/lib/buffer-writer.js @@ -3,13 +3,6 @@ import { Token, Type } from 'cborg' import { tokensToLength } from 'cborg/length' import * as CBOR from '@ipld/dag-cbor' -// Number of bytes required without any roots. -const EMPTY_HEADER_SIZE = 16 -// Number of bytes used for CIDv1 with sha256 digest -const DEFAULT_CID_SIZE = 36 -// Number of bytes added to tag CIDs -const CID_TAG_SIZE = 3 - /** * @typedef {import('../api').CID} CID * @typedef {import('../api').Block} Block @@ -36,7 +29,6 @@ class CarBufferWriter { * @type {CID[]} */ this.roots = [] - this.headerOffset = 0 this.headerSize = headerSize } @@ -75,25 +67,27 @@ class CarBufferWriter { * @param {{resize?:boolean}} [options] */ export const addRoot = (writer, root, { resize = false } = {}) => { - const { bytes, headerSize, byteOffset, roots, headerOffset } = writer - const byteLength = headerOffset + cidEncodeSize(root.bytes.byteLength) - const size = headerEncodeSize(writer.roots.length + 1, byteLength) - // If there is a space for the new root simply add it to the array - if (size <= headerSize) { - roots.push(root) - writer.headerOffset = byteLength - // If root does not fit in the header but there is space in buffer - } else if (size - headerSize + byteOffset < bytes.byteLength) { - if (resize) { - resizeHeader(writer, size) - roots.push(root) - writer.headerOffset = byteLength + const { bytes, headerSize, byteOffset, roots } = writer + writer.roots.push(root) + const size = headerLength(writer) + // If there is not enough space for the new root + if (size > headerSize) { + // Check if we root would fit if we were to resize the head. + if (size - headerSize + byteOffset < bytes.byteLength) { + // If resize is enabled resize head + if (resize) { + resizeHeader(writer, size) + // otherwise remove head and throw an error suggesting to resize + } else { + roots.pop() + throw new RangeError(`Header of size ${headerSize} has no capacity for new root ${root}. + However there is a space in the buffer and you could call addRoot(root, { resize: root }) to resize header to make a space for this root.`) + } + // If head would not fit even with resize pop new root and throw error } else { - throw new RangeError(`Header of size ${headerSize} has no capacity for new root ${root}. -However there is a space in the buffer and you could call addRoot(root, { resize: root }) to resize header to make a space for this root.`) + roots.pop() + throw new RangeError(`Buffer has no capacity for a new root ${root}`) } - } else { - throw new RangeError(`Buffer has no capacity for a new root ${root}`) } } @@ -187,71 +181,6 @@ const writeHeader = ({ bytes }, varint, header) => { bytes.set(header, varint.length) } -/** - * Attempts to estimate header size given number of roots it will have assuming - * they're CIDv1 in with sha256 digest. Optionally it takes number of bytes - * to be allocated for all the roots, in case different hashing or CID version - * is used. - * - * Note: Returned value is just an estimate which can be inaccurate where large - * number of CIDs is passed or if they are of various sizes. - * - * @param {number} count - Number of roots - * @param {number} [rootsByteLength] - Total byteLength of all roots - */ -export const estimateHeaderSize = ( - count, - rootsByteLength = count * DEFAULT_CID_SIZE -) => - count > 0 - ? headerEncodeSize(count, cidEncodeSize(Math.ceil(rootsByteLength / count)) * count) - : headerEncodeSize(0, 0) - -/** - * - * @param {number} count - * @param {number} rootsSize - * @returns - */ -const headerEncodeSize = (count, rootsSize) => { - const lengthSize = arrayLengthEncodeSize(count) - const headerSize = EMPTY_HEADER_SIZE + lengthSize + rootsSize - const varintSize = varint.encodingLength(headerSize) - return varintSize + headerSize -} - -/** - * - * @param {number} length - * @returns - */ -const arrayLengthEncodeSize = length => - length < 24 - ? 1 - : length < 256 - ? 2 - : length < 65536 - ? 3 - : CBOR.encode(length).length - -/** - * @param {number} byteLength - * @returns {number} - */ -const cidEncodeSize = byteLength => - byteLength + arrayLengthEncodeSize(byteLength) + CID_TAG_SIZE - -/** - * @param {CID[]} cids - */ -const totalByteLength = (cids) => { - let total = 0 - for (const cid of cids) { - total += cidEncodeSize(cid.bytes.byteLength) - } - return total -} - const headerPreludeTokens = [ new Token(Type.map, 2), new Token(Type.string, 'version'), @@ -259,7 +188,11 @@ const headerPreludeTokens = [ new Token(Type.string, 'roots') ] +const CID_TAG = new Token(Type.tag, 42) + /** + * Calculates header size given the array of byteLength for roots. + * * @param {number[]} rootLengths * @returns {number} */ @@ -267,24 +200,37 @@ export const calculateHeaderLength = (rootLengths) => { const tokens = [...headerPreludeTokens] tokens.push(new Token(Type.array, rootLengths.length)) for (const rootLength of rootLengths) { - tokens.push(new Token(Type.tag, 42)) + tokens.push(CID_TAG) tokens.push(new Token(Type.bytes, { length: rootLength + 1 })) } const length = tokensToLength(tokens) // no options needed here because we have simple tokens return varint.encodingLength(length) + length } +/** + * @param {{roots:CID[]}} options + */ +export const headerLength = ({ roots }) => + calculateHeaderLength(roots.map(cid => cid.bytes.byteLength)) + +/** + * @param {number} rootCount + * @param {number} [rootByteLength] + * @returns {number} + */ +export const estimateHeaderLength = (rootCount, rootByteLength = 36) => + calculateHeaderLength(new Array(rootCount).fill(rootByteLength)) + /** * Creates synchronous CAR writer that can be used to encode blocks into a given * buffer. Optionally you could pass `byteOffset` and `byteLength` to specify a * range inside buffer to write into. If car file is going to have `roots` you - * need to either pass them under `options.roots` or provide `options.headerCapacity` - * to allocate required space for the header (You can use `estimateHeaderCapacity` - * function to get an estimate). It is also possible to provide both `roots` - * and `headerCapacity` to allocate space for the roots that may not be known - * ahead of time. + * need to either pass them under `options.roots` (from which header size will + * be calculated) or provide `options.headerSize` to allocate required space + * in the buffer. You may also provide known `roots` and `headerSize` to + * allocate space for the roots that may not be known ahead of time. * - * Note: Incorrect header estimate may lead to copying bytes inside a buffer + * Note: Incorrect `headerSize` may lead to copying bytes inside a buffer * which will have a negative impact on performance. * * @param {ArrayBuffer} buffer @@ -297,7 +243,7 @@ export const createWriter = ( roots = [], byteOffset = 0, byteLength = buffer.byteLength, - headerSize = headerEncodeSize(roots.length, totalByteLength(roots)) + headerSize = headerLength({ roots }) } = {} ) => { const bytes = new Uint8Array(buffer, byteOffset, byteLength) diff --git a/test/test-buffer-writer.js b/test/test-buffer-writer.js index 14f47f0..24c6424 100644 --- a/test/test-buffer-writer.js +++ b/test/test-buffer-writer.js @@ -13,13 +13,17 @@ import * as Block from 'multiformats/block' describe('CarBufferWriter', () => { const cid = CID.parse('bafkreifuosuzujyf4i6psbneqtwg2fhplc2wxptc5euspa2gn3bwhnihfu') - describe('estimateHeaderSize', async () => { + describe('calculateHeaderLength', async () => { for (const count of [0, 1, 10, 18, 24, 48, 124, 255, 258, 65536 - 1, 65536]) { - it(`estimateHeaderCapacity(${count})`, () => { + it(`calculateHeaderLength(new Array(${count}).fill(36))`, () => { const roots = new Array(count).fill(cid) - assert.deepEqual(CarBufferWriter.estimateHeaderSize(count), createHeader(roots).byteLength) + const sizes = new Array(count).fill(cid.bytes.byteLength) + assert.deepEqual( + CarBufferWriter.calculateHeaderLength(sizes), + createHeader(roots).byteLength + ) }) - it(`calculateHeaderLength(${count})`, () => { + it(`calculateHeaderLength(new Array(${count}).fill(36))`, () => { const roots = new Array(count).fill(cid) const rootLengths = roots.map((c) => c.bytes.byteLength) assert.deepEqual(CarBufferWriter.calculateHeaderLength(rootLengths), createHeader(roots).byteLength) @@ -27,18 +31,33 @@ describe('CarBufferWriter', () => { } it('estimate on large CIDs', () => { const largeCID = CID.parse(`bafkqbbac${'a'.repeat(416)}`) - assert.ok(CarBufferWriter.estimateHeaderSize(2, cid.bytes.byteLength + largeCID.bytes.byteLength) >= createHeader([cid, largeCID]).byteLength) + assert.equal( + CarBufferWriter.calculateHeaderLength([ + cid.bytes.byteLength, + largeCID.bytes.byteLength + ]), + createHeader([ + cid, + largeCID + ]).byteLength + ) }) it('estimate on large CIDs 2', () => { const largeCID = CID.createV1(Raw.code, identity.digest(new Uint8Array(512).fill(1))) - assert.ok(CarBufferWriter.estimateHeaderSize(2, cid.bytes.byteLength + largeCID.bytes.byteLength) >= createHeader([cid, largeCID]).byteLength) + assert.equal( + CarBufferWriter.calculateHeaderLength([ + cid.bytes.byteLength, + largeCID.bytes.byteLength + ]), + createHeader([cid, largeCID]).byteLength + ) }) }) describe('writer', () => { it('estimate header and write blocks', async () => { - const headerSize = CarBufferWriter.estimateHeaderSize(1) + const headerSize = CarBufferWriter.estimateHeaderLength(1) const dataSize = 256 const buffer = new ArrayBuffer(headerSize + dataSize) const writer = CarBufferWriter.createWriter(buffer, { headerSize }) @@ -66,7 +85,7 @@ describe('CarBufferWriter', () => { }) it('overestimate header', async () => { - const headerSize = CarBufferWriter.estimateHeaderSize(2) + const headerSize = CarBufferWriter.estimateHeaderLength(2) const dataSize = 256 const buffer = new ArrayBuffer(headerSize + dataSize) const writer = CarBufferWriter.createWriter(buffer, { headerSize }) @@ -95,7 +114,7 @@ describe('CarBufferWriter', () => { }) it('underestimate header', async () => { - const headerSize = CarBufferWriter.estimateHeaderSize(2) + const headerSize = CarBufferWriter.estimateHeaderLength(2) const dataSize = 300 const buffer = new ArrayBuffer(headerSize + dataSize) const writer = CarBufferWriter.createWriter(buffer, { headerSize }) @@ -126,7 +145,7 @@ describe('CarBufferWriter', () => { }) it('has no space for the root', async () => { - const headerSize = CarBufferWriter.estimateHeaderSize(1) + const headerSize = CarBufferWriter.estimateHeaderLength(2) const dataSize = 100 const buffer = new ArrayBuffer(headerSize + dataSize) const writer = CarBufferWriter.createWriter(buffer, { headerSize }) @@ -156,7 +175,7 @@ describe('CarBufferWriter', () => { }) it('has no space for the block', async () => { - const headerSize = CarBufferWriter.estimateHeaderSize(1) + const headerSize = CarBufferWriter.estimateHeaderLength(1) const dataSize = 58 const buffer = new ArrayBuffer(headerSize + dataSize) const writer = CarBufferWriter.createWriter(buffer, { headerSize })