diff --git a/packages/upload-client/package.json b/packages/upload-client/package.json index 44a5ee183..8a8883ac2 100644 --- a/packages/upload-client/package.json +++ b/packages/upload-client/package.json @@ -65,7 +65,7 @@ "dependencies": { "@ipld/car": "^5.0.3", "@ipld/dag-ucan": "^3.2.0", - "@ipld/unixfs": "^2.0.1", + "@ipld/unixfs": "^2.1.1", "@ucanto/client": "^5.1.0", "@ucanto/interface": "^6.0.0", "@ucanto/transport": "^5.1.0", diff --git a/packages/upload-client/src/unixfs.js b/packages/upload-client/src/unixfs.js index 07e3c8f1a..b8793c744 100644 --- a/packages/upload-client/src/unixfs.js +++ b/packages/upload-client/src/unixfs.js @@ -1,12 +1,16 @@ import * as UnixFS from '@ipld/unixfs' import * as raw from 'multiformats/codecs/raw' +import { withMaxChunkSize } from '@ipld/unixfs/file/chunker/fixed' +import { withWidth } from '@ipld/unixfs/file/layout/balanced' +const SHARD_THRESHOLD = 1000 // shard directory after > 1,000 items const queuingStrategy = UnixFS.withCapacity() -// TODO: configure chunk size and max children https://github.com/ipld/js-unixfs/issues/36 const settings = UnixFS.configure({ fileChunkEncoder: raw, smallFileEncoder: raw, + chunker: withMaxChunkSize(1024 * 1024), + fileLayout: withWidth(1024), }) /** @@ -64,7 +68,10 @@ class UnixFSDirectoryBuilder { /** @param {import('@ipld/unixfs').View} writer */ async finalize(writer) { - const dirWriter = UnixFS.createDirectoryWriter(writer) + const dirWriter = + this.entries.size <= SHARD_THRESHOLD + ? UnixFS.createDirectoryWriter(writer) + : UnixFS.createShardedDirectoryWriter(writer) for (const [name, entry] of this.entries) { const link = await entry.finalize(writer) dirWriter.set(name, link) diff --git a/packages/upload-client/test/index.test.js b/packages/upload-client/test/index.test.js index 024c1ee58..3c298f5e9 100644 --- a/packages/upload-client/test/index.test.js +++ b/packages/upload-client/test/index.test.js @@ -113,7 +113,7 @@ describe('uploadFile', () => { it('allows custom shard size to be set', async () => { const space = await Signer.generate() const agent = await Signer.generate() // The "user" that will ask the service to accept the upload - const file = new Blob([await randomBytes(500_000)]) + const file = new Blob([await randomBytes(1024 * 1024 * 5)]) /** @type {import('../src/types').CARLink[]} */ const carCIDs = [] @@ -174,12 +174,12 @@ describe('uploadFile', () => { file, { connection, - shardSize: 400_000, // should end up with 2 CAR files + shardSize: 1024 * 1024 * 2, // should end up with 2 CAR files onShardStored: (meta) => carCIDs.push(meta.cid), } ) - assert.equal(carCIDs.length, 2) + assert.equal(carCIDs.length, 3) }) }) diff --git a/packages/upload-client/test/sharding.test.js b/packages/upload-client/test/sharding.test.js index b0493d7e6..6156a95ee 100644 --- a/packages/upload-client/test/sharding.test.js +++ b/packages/upload-client/test/sharding.test.js @@ -15,8 +15,8 @@ import { mockService } from './helpers/mocks.js' describe('ShardingStream', () => { it('creates shards from blocks', async () => { - const file = new Blob([await randomBytes(1024 * 1024)]) - const shardSize = 512 * 1024 + const file = new Blob([await randomBytes(1024 * 1024 * 5)]) + const shardSize = 1024 * 1024 * 2 /** @type {import('../src/types').CARFile[]} */ const shards = [] diff --git a/packages/upload-client/test/unixfs.test.js b/packages/upload-client/test/unixfs.test.js index 7f197c9eb..8465e1b91 100644 --- a/packages/upload-client/test/unixfs.test.js +++ b/packages/upload-client/test/unixfs.test.js @@ -1,4 +1,5 @@ import assert from 'assert' +import { decode, NodeType } from '@ipld/unixfs' import { exporter } from 'ipfs-unixfs-exporter' import { MemoryBlockstore } from 'blockstore-core/memory' import * as raw from 'multiformats/codecs/raw' @@ -67,6 +68,31 @@ describe('UnixFS', () => { expectedPaths.forEach((p) => assert(actualPaths.includes(p))) }) + it('encodes a sharded directory', async () => { + const files = [] + for (let i = 0; i < 1001; i++) { + files.push(new File([`data${i}`], `file${i}.txt`)) + } + + const { cid, blocks } = await encodeDirectory(files) + const blockstore = await blocksToBlockstore(blocks) + const dirEntry = await exporter(cid.toString(), blockstore) + assert.equal(dirEntry.type, 'directory') + + const expectedPaths = files.map((f) => path.join(cid.toString(), f.name)) + // @ts-expect-error + const entries = await collectDir(dirEntry) + const actualPaths = entries.map((e) => e.path) + + expectedPaths.forEach((p) => assert(actualPaths.includes(p))) + + // check root node is a HAMT sharded directory + // @ts-expect-error + const bytes = await blockstore.get(cid) + const node = decode(bytes) + assert.equal(node.type, NodeType.HAMTShard) + }) + it('throws then treating a file as a directory', () => assert.rejects( encodeDirectory([ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 053c62024..4fcc00e4c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -328,7 +328,7 @@ importers: specifiers: '@ipld/car': ^5.0.3 '@ipld/dag-ucan': ^3.2.0 - '@ipld/unixfs': ^2.0.1 + '@ipld/unixfs': ^2.1.1 '@types/assert': ^1.5.6 '@types/mocha': ^10.0.1 '@ucanto/client': ^5.1.0 @@ -353,7 +353,7 @@ importers: dependencies: '@ipld/car': 5.1.0 '@ipld/dag-ucan': 3.2.0 - '@ipld/unixfs': 2.0.1 + '@ipld/unixfs': 2.1.1 '@ucanto/client': 5.1.0 '@ucanto/interface': 6.0.0 '@ucanto/transport': 5.1.0 @@ -2512,8 +2512,8 @@ packages: cborg: 1.10.0 multiformats: 11.0.1 - /@ipld/dag-pb/4.0.0: - resolution: {integrity: sha512-8FB/qTlNowCiszL9Sek8xH6xIQxIioXuzZ5B1jVPknQMVkd08nZUHzDjrn1Y6MqJ5PrXWLrBwNghGMWPPpvNVw==} + /@ipld/dag-pb/4.0.2: + resolution: {integrity: sha512-me9oEPb7UNPWSplUFCXyxnQE3/WlsjOljqO2RZN44XOmGkBY0/WVklbXorVE1eiv0Rt3p6dBS2x36Rq8A0Am8A==} engines: {node: '>=16.0.0', npm: '>=7.0.0'} dependencies: multiformats: 11.0.1 @@ -2525,10 +2525,12 @@ packages: '@ipld/dag-json': 10.0.1 multiformats: 11.0.1 - /@ipld/unixfs/2.0.1: - resolution: {integrity: sha512-W3LD67tLrIGpCVYFN6N/x6bL3o03zmsfd7jPAD1aXfGXaQWWa95qXPwc6PMVRTttxha/bHMKQiG2ZeFCqp83Ew==} + /@ipld/unixfs/2.1.1: + resolution: {integrity: sha512-g3gr/3XvfQs4x2VFjlICae09ul5fbWCKRInN6Vgeot2+GH0h/krr3PqZCIo4dy4Ou2mQOsIddxUvG8UZ4p9SbQ==} dependencies: - '@ipld/dag-pb': 4.0.0 + '@ipld/dag-pb': 4.0.2 + '@multiformats/murmur3': 2.1.3 + '@perma/map': 1.0.2 '@web-std/stream': 1.0.1 actor: 2.3.1 multiformats: 11.0.1 @@ -2813,6 +2815,14 @@ packages: murmurhash3js-revisited: 3.0.0 dev: true + /@multiformats/murmur3/2.1.3: + resolution: {integrity: sha512-YvLK1IrLnRckPsvXhOkZjaIGNonsEdD1dL3NPSaLilV/WjVYeBgnNZXTUsaPzFXGrIFM7motx+yCmmqzXO6gtQ==} + engines: {node: '>=16.0.0', npm: '>=7.0.0'} + dependencies: + multiformats: 11.0.1 + murmurhash3js-revisited: 3.0.0 + dev: false + /@noble/ed25519/1.7.3: resolution: {integrity: sha512-iR8GBkDt0Q3GyaVcIu7mSsVIqnFbkbRzGLWlvhwunacoLwt4J3swfKhfaM6rN6WY+TBGoYT1GtT1mIh2/jGbRQ==} @@ -2841,6 +2851,12 @@ packages: fastq: 1.15.0 dev: true + /@perma/map/1.0.2: + resolution: {integrity: sha512-hujwGOY6yTYnpf5YAtpD5MJAI1kcsVPqyN0lxG8Sampf/InO3jmX/MlJCHCGFPpPqB5JyO5WNnL+tUs1Umqe0A==} + dependencies: + murmurhash3js-revisited: 3.0.0 + dev: false + /@phenomnomnominal/tsquery/4.2.0_typescript@4.9.5: resolution: {integrity: sha512-hR2U3uVcrrdkuG30ItQ+uFDs4ncZAybxWG0OjTE8ptPzVoU7GVeXpy+vMU8zX9EbmjGeITPw/su5HjYQyAH8bA==} peerDependencies: @@ -7830,7 +7846,7 @@ packages: engines: {node: '>=16.0.0', npm: '>=7.0.0'} dependencies: '@ipld/dag-cbor': 9.0.0 - '@ipld/dag-pb': 4.0.0 + '@ipld/dag-pb': 4.0.2 '@multiformats/murmur3': 2.1.2 err-code: 3.0.1 hamt-sharding: 3.0.2 @@ -9093,7 +9109,6 @@ packages: /murmurhash3js-revisited/3.0.0: resolution: {integrity: sha512-/sF3ee6zvScXMb1XFJ8gDsSnY+X8PbOyjIuBhtgis10W2Jx4ZjIhikUCIF9c4gpJxVnQIsPAFrSwTCuAjicP6g==} engines: {node: '>=8.0.0'} - dev: true /mustache/4.2.0: resolution: {integrity: sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==}