Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

Commit

Permalink
feat(core): add option to specify chunking algorithm
Browse files Browse the repository at this point in the history
This allows the chunking algorithm, and options to be specified when using the adding files.
Specifying chunker and options are identical to go-ipfs and support the following formats:
default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}
This is required to achieve parity with go-ipfs.

Fixes #1283

License: MIT
Signed-off-by: Dan Ordille <dordille@gmail.com>
  • Loading branch information
dordille committed Jul 26, 2018
1 parent f4344b0 commit 562e302
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/cli/commands/files/add.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ module.exports = {
default: false,
describe: 'Only chunk and hash, do not write'
},
chunker: {
default: 'default',
describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]'
},
'enable-sharding-experiment': {
type: 'boolean',
default: false
Expand Down Expand Up @@ -194,7 +198,8 @@ module.exports = {
onlyHash: argv.onlyHash,
hashAlg: argv.hash,
wrapWithDirectory: argv.wrapWithDirectory,
pin: argv.pin
pin: argv.pin,
chunker: argv.chunker
}

// Temporary restriction on raw-leaves:
Expand Down
4 changes: 3 additions & 1 deletion src/core/components/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const Duplex = require('readable-stream').Duplex
const OtherBuffer = require('buffer').Buffer
const CID = require('cids')
const toB58String = require('multihashes').toB58String
const parseChunkerString = require('../utils').parseChunkerString

const WRAPPER = 'wrapper/'

Expand Down Expand Up @@ -134,11 +135,12 @@ class AddHelper extends Duplex {

module.exports = function files (self) {
function _addPullStream (options) {
const chunkerOptions = parseChunkerString(options.chunker)
const opts = Object.assign({}, {
shardSplitThreshold: self._options.EXPERIMENTAL.sharding
? 1000
: Infinity
}, options)
}, options, chunkerOptions)

if (opts.hashAlg && opts.cidVersion !== 1) {
opts.cidVersion = 1
Expand Down
88 changes: 88 additions & 0 deletions src/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,93 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) {
}, callback)
})

/**
* Parses chunker string into options used by DAGBuilder in ipfs-unixfs-engine
*
*
* @param {String} chunker Chunker algorithm supported formats:
* "default" ("")
* "size-{size}",
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} Chunker options for DAGBuilder
*/
function parseChunkerString (chunker) {
if (!chunker || chunker === '' || chunker === 'default') {
return {
chunker: 'fixed'
}
} else if (chunker.startsWith('size-')) {
const sizeStr = chunker.split('-')[1]
const size = parseInt(sizeStr)
if (isNaN(size)) {
throw new Error('Parameter avg must be an integer')
}
return {
chunker: 'fixed',
chunkerOptions: {
maxChunkSize: size
}
}
} else if (chunker.startsWith('rabin')) {
return {
chunker: 'rabin',
chunkerOptions: parseRabinString(chunker)
}
} else {
throw new Error(`unrecognized chunker option: ${chunker}`)
}
}

/**
* Parses rabin chunker string
*
* @param {String} chunker Chunker algorithm supported formats:
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} rabin chunker options
*/
function parseRabinString (chunker) {
const options = {}
const parts = chunker.split('-')
switch (parts.length) {
case 1:
options.avgChunkSize = 262144
break
case 2:
options.avgChunkSize = parseInt(parts[1])
if (isNaN(options.avgChunkSize)) {
throw new Error('Parameter avg must be an integer')
}
break
case 4:
options.minChunkSize = parseSub(parts[1].split(':'), 'min')
options.avgChunkSize = parseSub(parts[2].split(':'), 'avg')
options.maxChunkSize = parseSub(parts[3].split(':'), 'max')
break
default:
throw new Error('incorrect format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"')
}

return options
}

function parseSub (sub, name) {
if (sub.length > 1 && sub[0] !== name) {
throw new Error('Parameter order must be min:avg:max')
}
let size = parseInt(sub[sub.length - 1])
if (isNaN(size)) {
throw new Error(`Parameter ${name} must be an integer`)
}

return size
}

exports.parseIpfsPath = parseIpfsPath
exports.resolvePath = resolvePath
exports.parseChunkerString = parseChunkerString
3 changes: 2 additions & 1 deletion src/http/api/resources/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ exports.add = {
onlyHash: request.query['only-hash'],
hashAlg: request.query['hash'],
wrapWithDirectory: request.query['wrap-with-directory'],
pin: request.query.pin
pin: request.query.pin,
chunker: request.query['chunker']
}

const aborter = abortable()
Expand Down
47 changes: 47 additions & 0 deletions test/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,51 @@ describe('utils', () => {
})
})
})

describe('parseChunkerString', () => {
it('handles an empty string', () => {
const options = utils.parseChunkerString('')
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('handles a null chunker string', () => {
const options = utils.parseChunkerString(null)
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('parses a fixed size string', () => {
const options = utils.parseChunkerString('size-512')
expect(options).to.have.property('chunker').to.equal('fixed')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('maxChunkSize')
.to.equal(512)
})

it('parses a rabin string without size', () => {
const options = utils.parseChunkerString('rabin')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
})

it('parses a rabin string with only avg size', () => {
const options = utils.parseChunkerString('rabin-512')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
.to.equal(512)
})

it('parses a rabin string with min, avg, and max', () => {
const options = utils.parseChunkerString('rabin-42-92-184')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options).to.have.property('chunkerOptions')
expect(options.chunkerOptions).to.have.property('minChunkSize').to.equal(42)
expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92)
expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184)
})
})
})

0 comments on commit 562e302

Please sign in to comment.