Skip to content

Commit

Permalink
feat: add HAMT sharded directories support (#536)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alan Shaw authored Mar 14, 2023
1 parent 3868d97 commit 346b050
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 17 deletions.
2 changes: 1 addition & 1 deletion packages/upload-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
"dependencies": {
"@ipld/car": "^5.0.3",
"@ipld/dag-ucan": "^3.2.0",
"@ipld/unixfs": "^2.0.1",
"@ipld/unixfs": "^2.1.1",
"@ucanto/client": "^5.1.0",
"@ucanto/interface": "^6.0.0",
"@ucanto/transport": "^5.1.0",
Expand Down
11 changes: 9 additions & 2 deletions packages/upload-client/src/unixfs.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import * as UnixFS from '@ipld/unixfs'
import * as raw from 'multiformats/codecs/raw'
import { withMaxChunkSize } from '@ipld/unixfs/file/chunker/fixed'
import { withWidth } from '@ipld/unixfs/file/layout/balanced'

const SHARD_THRESHOLD = 1000 // shard directory after > 1,000 items
const queuingStrategy = UnixFS.withCapacity()

// TODO: configure chunk size and max children https://github.com/ipld/js-unixfs/issues/36
const settings = UnixFS.configure({
fileChunkEncoder: raw,
smallFileEncoder: raw,
chunker: withMaxChunkSize(1024 * 1024),
fileLayout: withWidth(1024),
})

/**
Expand Down Expand Up @@ -64,7 +68,10 @@ class UnixFSDirectoryBuilder {

/** @param {import('@ipld/unixfs').View} writer */
async finalize(writer) {
const dirWriter = UnixFS.createDirectoryWriter(writer)
const dirWriter =
this.entries.size <= SHARD_THRESHOLD
? UnixFS.createDirectoryWriter(writer)
: UnixFS.createShardedDirectoryWriter(writer)
for (const [name, entry] of this.entries) {
const link = await entry.finalize(writer)
dirWriter.set(name, link)
Expand Down
6 changes: 3 additions & 3 deletions packages/upload-client/test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ describe('uploadFile', () => {
it('allows custom shard size to be set', async () => {
const space = await Signer.generate()
const agent = await Signer.generate() // The "user" that will ask the service to accept the upload
const file = new Blob([await randomBytes(500_000)])
const file = new Blob([await randomBytes(1024 * 1024 * 5)])
/** @type {import('../src/types').CARLink[]} */
const carCIDs = []

Expand Down Expand Up @@ -174,12 +174,12 @@ describe('uploadFile', () => {
file,
{
connection,
shardSize: 400_000, // should end up with 2 CAR files
shardSize: 1024 * 1024 * 2, // should end up with 2 CAR files
onShardStored: (meta) => carCIDs.push(meta.cid),
}
)

assert.equal(carCIDs.length, 2)
assert.equal(carCIDs.length, 3)
})
})

Expand Down
4 changes: 2 additions & 2 deletions packages/upload-client/test/sharding.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ import { mockService } from './helpers/mocks.js'

describe('ShardingStream', () => {
it('creates shards from blocks', async () => {
const file = new Blob([await randomBytes(1024 * 1024)])
const shardSize = 512 * 1024
const file = new Blob([await randomBytes(1024 * 1024 * 5)])
const shardSize = 1024 * 1024 * 2

/** @type {import('../src/types').CARFile[]} */
const shards = []
Expand Down
26 changes: 26 additions & 0 deletions packages/upload-client/test/unixfs.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import assert from 'assert'
import { decode, NodeType } from '@ipld/unixfs'
import { exporter } from 'ipfs-unixfs-exporter'
import { MemoryBlockstore } from 'blockstore-core/memory'
import * as raw from 'multiformats/codecs/raw'
Expand Down Expand Up @@ -67,6 +68,31 @@ describe('UnixFS', () => {
expectedPaths.forEach((p) => assert(actualPaths.includes(p)))
})

it('encodes a sharded directory', async () => {
const files = []
for (let i = 0; i < 1001; i++) {
files.push(new File([`data${i}`], `file${i}.txt`))
}

const { cid, blocks } = await encodeDirectory(files)
const blockstore = await blocksToBlockstore(blocks)
const dirEntry = await exporter(cid.toString(), blockstore)
assert.equal(dirEntry.type, 'directory')

const expectedPaths = files.map((f) => path.join(cid.toString(), f.name))
// @ts-expect-error
const entries = await collectDir(dirEntry)
const actualPaths = entries.map((e) => e.path)

expectedPaths.forEach((p) => assert(actualPaths.includes(p)))

// check root node is a HAMT sharded directory
// @ts-expect-error
const bytes = await blockstore.get(cid)
const node = decode(bytes)
assert.equal(node.type, NodeType.HAMTShard)
})

it('throws then treating a file as a directory', () =>
assert.rejects(
encodeDirectory([
Expand Down
33 changes: 24 additions & 9 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 346b050

Please sign in to comment.