From a47c9ede718dd8265a991c38afc684bb43e93d98 Mon Sep 17 00:00:00 2001
From: Alex Potsides <alex@achingbrain.net>
Date: Fri, 22 Nov 2019 09:35:33 -0600
Subject: [PATCH] feat: support storing metadata in unixfs nodes (#39)

* feat: support storing metadata in unixfs nodes

Adds `mtime` and `mode` properties to `{path, content}` import entries

* chore: remove gh url

* chore: upgrade node

* chore: update deps

* fix: add metadata to directories too

* fix: add metadata to imported directories
---
 .travis.yml                   |   2 +-
 README.md                     |  27 ++--
 package.json                  |  10 +-
 src/dag-builder/dir.js        |   9 ++
 src/dag-builder/file/index.js |  21 ++-
 src/dir-flat.js               |  10 ++
 src/dir-sharded.js            |  14 +-
 src/flat-to-shard.js          |   8 +-
 src/tree-builder.js           |  36 ++---
 test/benchmark.spec.js        |   2 +-
 test/importer.spec.js         | 240 ++++++++++++++++++++++++++++++++++
 11 files changed, 331 insertions(+), 48 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d0cf281..be3ad28 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@ stages:
   - cov
 
 node_js:
-  - '10'
+  - '12'
 
 os:
   - linux
diff --git a/README.md b/README.md
index 101f179..aab39d9 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# ipfs-unixfs-importer
+# ipfs-unixfs-importer <!-- omit in toc -->
 
 [![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
 [![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
@@ -13,22 +13,19 @@
 
 > JavaScript implementation of the layout and chunking mechanisms used by IPFS to handle Files
 
-## Lead Maintainer
+## Lead Maintainer <!-- omit in toc -->
 
 [Alex Potsides](https://github.com/achingbrain)
 
-## Table of Contents
+## Table of Contents <!-- omit in toc -->
 
-- [ipfs-unixfs-importer](#ipfs-unixfs-importer)
-  - [Lead Maintainer](#lead-maintainer)
-  - [Table of Contents](#table-of-contents)
-  - [Install](#install)
-  - [Usage](#usage)
-    - [Example](#example)
-      - [API](#api)
-      - [const import = importer(source, ipld [, options])](#const-import--importersource-ipld--options)
-  - [Contribute](#contribute)
-  - [License](#license)
+- [Install](#install)
+- [Usage](#usage)
+  - [Example](#example)
+    - [API](#api)
+    - [const import = importer(source, ipld [, options])](#const-import--importersource-ipld--options)
+- [Contribute](#contribute)
+- [License](#license)
 
 ## Install
 
@@ -108,7 +105,9 @@ The `import` function returns an async iterator takes a source async iterator th
 ```js
 {
   path: 'a name',
-  content: (Buffer or iterator emitting Buffers)
+  content: (Buffer or iterator emitting Buffers),
+  mtime: (Number representing seconds since (positive) or before (negative) the Unix Epoch),
+  mode: (Number representing ugo-rwx, setuid, setguid and sticky bit)
 }
 ```
 
diff --git a/package.json b/package.json
index 43c5a4d..a724e3e 100644
--- a/package.json
+++ b/package.json
@@ -44,7 +44,7 @@
     "cids": "~0.7.1",
     "detect-node": "^2.0.4",
     "dirty-chai": "^2.0.1",
-    "ipfs-unixfs-exporter": "~0.37.0",
+    "ipfs-unixfs-exporter": "^0.39.0",
     "ipld": "^0.25.0",
     "ipld-in-memory": "^3.0.0",
     "multihashes": "~0.4.14",
@@ -55,16 +55,16 @@
     "async-iterator-all": "^1.0.0",
     "async-iterator-batch": "~0.0.1",
     "async-iterator-first": "^1.0.0",
-    "bl": "^3.0.0",
+    "bl": "^4.0.0",
     "deep-extend": "~0.6.0",
     "err-code": "^2.0.0",
     "hamt-sharding": "~0.0.2",
-    "ipfs-unixfs": "~0.1.16",
+    "ipfs-unixfs": "^0.2.0",
     "ipld-dag-pb": "^0.18.0",
     "multicodec": "~0.5.1",
-    "multihashing-async": "~0.7.0",
+    "multihashing-async": "^0.8.0",
     "rabin-wasm": "~0.0.8",
-    "superstruct": "~0.6.1"
+    "superstruct": "^0.8.2"
   },
   "contributors": [
     "Alan Shaw <alan.shaw@protocol.ai>",
diff --git a/src/dag-builder/dir.js b/src/dag-builder/dir.js
index 7c4d3b3..7ba8111 100644
--- a/src/dag-builder/dir.js
+++ b/src/dag-builder/dir.js
@@ -8,6 +8,15 @@ const {
 
 const dirBuilder = async (item, ipld, options) => {
   const unixfs = new UnixFS('directory')
+
+  if (item.mtime) {
+    unixfs.mtime = item.mtime
+  }
+
+  if (item.mode) {
+    unixfs.mode = item.mode
+  }
+
   const node = new DAGNode(unixfs.marshal(), [])
   const cid = await persist(node, ipld, options)
   const path = item.path
diff --git a/src/dag-builder/file/index.js b/src/dag-builder/file/index.js
index a3fb565..f3be403 100644
--- a/src/dag-builder/file/index.js
+++ b/src/dag-builder/file/index.js
@@ -15,7 +15,7 @@ const dagBuilders = {
   trickle: require('./trickle')
 }
 
-async function * buildFile (source, ipld, options) {
+async function * buildFile (file, source, ipld, options) {
   let count = -1
   let previous
 
@@ -36,6 +36,15 @@ async function * buildFile (source, ipld, options) {
       opts.cidVersion = 1
     } else {
       unixfs = new UnixFS(options.leafType, buffer)
+
+      if (file.mtime) {
+        unixfs.mtime = file.mtime
+      }
+
+      if (file.mode) {
+        unixfs.mode = file.mode
+      }
+
       node = new DAGNode(unixfs.marshal())
     }
 
@@ -81,6 +90,14 @@ const reduce = (file, ipld, options) => {
     // create a parent node and add all the leaves
     const f = new UnixFS('file')
 
+    if (file.mtime) {
+      f.mtime = file.mtime
+    }
+
+    if (file.mode) {
+      f.mode = file.mode
+    }
+
     const links = leaves
       .filter(leaf => {
         if (leaf.cid.codec === 'raw' && leaf.node.length) {
@@ -132,7 +149,7 @@ const fileBuilder = async (file, source, ipld, options) => {
     throw errCode(new Error(`Unknown importer build strategy name: ${options.strategy}`), 'ERR_BAD_STRATEGY')
   }
 
-  const roots = await all(dagBuilder(buildFile(source, ipld, options), reduce(file, ipld, options), options.builderOptions))
+  const roots = await all(dagBuilder(buildFile(file, source, ipld, options), reduce(file, ipld, options), options.builderOptions))
 
   if (roots.length > 1) {
     throw errCode(new Error('expected a maximum of 1 roots and got ' + roots.length), 'ETOOMANYROOTS')
diff --git a/src/dir-flat.js b/src/dir-flat.js
index d2433be..cd8c10f 100644
--- a/src/dir-flat.js
+++ b/src/dir-flat.js
@@ -17,6 +17,7 @@ class DirFlat extends Dir {
   put (name, value) {
     this.cid = undefined
     this.size = undefined
+
     this._children[name] = value
   }
 
@@ -68,6 +69,15 @@ class DirFlat extends Dir {
     }
 
     const unixfs = new UnixFS('directory')
+
+    if (this.mtime) {
+      unixfs.mtime = this.mtime
+    }
+
+    if (this.mode) {
+      unixfs.mode = this.mode
+    }
+
     const node = new DAGNode(unixfs.marshal(), links)
     const cid = await persist(node, ipld, this.options)
 
diff --git a/src/dir-sharded.js b/src/dir-sharded.js
index f27074f..e515b8a 100644
--- a/src/dir-sharded.js
+++ b/src/dir-sharded.js
@@ -73,7 +73,7 @@ class DirSharded extends Dir {
   }
 
   async * flush (path, ipld) {
-    for await (const entry of flush(path, this._bucket, ipld, this.options)) {
+    for await (const entry of flush(path, this._bucket, ipld, this, this.options)) {
       yield entry
     }
   }
@@ -83,7 +83,7 @@ module.exports = DirSharded
 
 module.exports.hashFn = hashFn
 
-async function * flush (path, bucket, ipld, options) {
+async function * flush (path, bucket, ipld, shardRoot, options) {
   const children = bucket._children
   const links = []
 
@@ -99,7 +99,7 @@ async function * flush (path, bucket, ipld, options) {
     if (Bucket.isBucket(child)) {
       let shard
 
-      for await (const subShard of await flush('', child, ipld, options)) {
+      for await (const subShard of await flush('', child, ipld, null, options)) {
         shard = subShard
       }
 
@@ -141,6 +141,14 @@ async function * flush (path, bucket, ipld, options) {
   dir.fanout = bucket.tableSize()
   dir.hashType = options.hashFn.code
 
+  if (shardRoot && shardRoot.mtime) {
+    dir.mtime = shardRoot.mtime
+  }
+
+  if (shardRoot && shardRoot.mode) {
+    dir.mode = shardRoot.mode
+  }
+
   const node = new DAGNode(dir.marshal(), links)
   const cid = await persist(node, ipld, options)
 
diff --git a/src/flat-to-shard.js b/src/flat-to-shard.js
index aa7675e..1617b4d 100644
--- a/src/flat-to-shard.js
+++ b/src/flat-to-shard.js
@@ -20,9 +20,7 @@ module.exports = async function flatToShard (child, dir, threshold, options) {
       await parent.put(newDir.parentKey, newDir)
     }
 
-    if (parent) {
-      return flatToShard(newDir, parent, threshold, options)
-    }
+    return flatToShard(newDir, parent, threshold, options)
   }
 
   return newDir
@@ -36,7 +34,9 @@ async function convertToShard (oldDir, options) {
     parentKey: oldDir.parentKey,
     path: oldDir.path,
     dirty: oldDir.dirty,
-    flat: false
+    flat: false,
+    mtime: oldDir.mtime,
+    mode: oldDir.mode
   }, options)
 
   for await (const { key, child } of oldDir.eachChildSeries()) {
diff --git a/src/tree-builder.js b/src/tree-builder.js
index 8f48595..55bab49 100644
--- a/src/tree-builder.js
+++ b/src/tree-builder.js
@@ -36,7 +36,9 @@ async function addToTree (elem, tree, options) {
           parentKey: pathElem,
           path: currentPath,
           dirty: true,
-          flat: true
+          flat: true,
+          mtime: dir && dir.unixfs && dir.unixfs.mtime,
+          mode: dir && dir.unixfs && dir.unixfs.mode
         }, options)
       }
 
@@ -64,28 +66,26 @@ async function * treeBuilder (source, ipld, options) {
     yield entry
   }
 
-  if (tree) {
-    if (!options.wrapWithDirectory) {
-      if (tree.childCount() > 1) {
-        throw errCode(new Error('detected more than one root'), 'ERR_MORE_THAN_ONE_ROOT')
-      }
-
-      const unwrapped = await first(tree.eachChildSeries())
-
-      if (!unwrapped) {
-        return
-      }
-
-      tree = unwrapped.child
+  if (!options.wrapWithDirectory) {
+    if (tree.childCount() > 1) {
+      throw errCode(new Error('detected more than one root'), 'ERR_MORE_THAN_ONE_ROOT')
     }
 
-    if (!tree.dir) {
+    const unwrapped = await first(tree.eachChildSeries())
+
+    if (!unwrapped) {
       return
     }
 
-    for await (const entry of tree.flush(tree.path, ipld)) {
-      yield entry
-    }
+    tree = unwrapped.child
+  }
+
+  if (!tree.dir) {
+    return
+  }
+
+  for await (const entry of tree.flush(tree.path, ipld)) {
+    yield entry
   }
 }
 
diff --git a/test/benchmark.spec.js b/test/benchmark.spec.js
index 1c96013..b0b1db2 100644
--- a/test/benchmark.spec.js
+++ b/test/benchmark.spec.js
@@ -24,7 +24,7 @@ describe.skip('benchmark', function () {
   const times = []
 
   after(() => {
-    console.info(`Percent\tms`) // eslint-disable-line no-console
+    console.info('Percent\tms') // eslint-disable-line no-console
     times.forEach((time, index) => {
       console.info(`${index}\t${parseInt(time / REPEATS)}`) // eslint-disable-line no-console
     })
diff --git a/test/importer.spec.js b/test/importer.spec.js
index ed618df..6fab88e 100644
--- a/test/importer.spec.js
+++ b/test/importer.spec.js
@@ -676,5 +676,245 @@ strategies.forEach((strategy) => {
         }
       }
     })
+
+    it('supports passing mtime', async () => {
+      this.timeout(60 * 1000)
+
+      const options = {
+        rawLeaves: true
+      }
+      const now = parseInt(Date.now() / 1000)
+
+      for await (const file of importer([{
+        path: '1.2MiB.txt',
+        content: bigFile,
+        mtime: now
+      }], ipld, options)) {
+        const node = await exporter(file.cid, ipld)
+
+        expect(node.unixfs.mtime).to.equal(now)
+      }
+    })
+
+    it('supports passing mtime for directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = parseInt(Date.now() / 1000)
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now
+      }], ipld))
+
+      const node = await exporter(entries[0].cid, ipld)
+      expect(node.unixfs.mtime).to.equal(now)
+    })
+
+    it('supports passing metadata for wrapping directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = parseInt(Date.now() / 1000)
+      const perms = parseInt('0777', 8)
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/bar.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node.unixfs.mtime).to.equal(now)
+      expect(node.unixfs.mode).to.equal(perms)
+    })
+
+    it('supports passing metadata for intermediate directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = parseInt(Date.now() / 1000)
+      const perms = parseInt('0777', 8)
+
+      const entries = await all(importer([{
+        path: '/foo/bar',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/bar/baz.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node.unixfs.mtime).to.equal(now)
+      expect(node.unixfs.mode).to.equal(perms)
+    })
+
+    it('supports passing metadata for out of order intermediate directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = parseInt(Date.now() / 1000)
+      const perms = parseInt('0777', 8)
+
+      const entries = await all(importer([{
+        path: '/foo/bar/qux.txt',
+        content: bigFile
+      }, {
+        path: '/foo/bar',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/quux'
+      }, {
+        path: '/foo/bar/baz.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory' && node.name === 'bar').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node.unixfs.mtime).to.equal(now)
+      expect(node.unixfs.mode).to.equal(perms)
+    })
+
+    it('supports passing mtime for hamt-sharded-directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = parseInt(Date.now() / 1000)
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now
+      }, {
+        path: '/foo/bar.txt',
+        content: bigFile
+      }, {
+        path: '/foo/baz.txt',
+        content: bigFile
+      }, {
+        path: '/foo/qux'
+      }], ipld, {
+        shardSplitThreshold: 0
+      }))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'hamt-sharded-directory').pop()
+
+      if (!node) {
+        expect.fail('no hamt-sharded-directory found')
+      }
+
+      expect(node.unixfs.mtime).to.equal(now)
+    })
+
+    it('supports passing mode', async () => {
+      this.timeout(60 * 1000)
+
+      const options = {
+        rawLeaves: true
+      }
+      const mode = parseInt('0111', 8)
+
+      for await (const file of importer([{
+        path: '1.2MiB.txt',
+        content: bigFile,
+        mode
+      }], ipld, options)) {
+        const node = await exporter(file.cid, ipld)
+
+        expect(node.unixfs.mode).to.equal(mode)
+      }
+    })
+
+    it('supports passing mode for directories', async () => {
+      this.timeout(60 * 1000)
+
+      const mode = parseInt('0111', 8)
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mode
+      }], ipld))
+
+      const node = await exporter(entries[0].cid, ipld)
+      expect(node.unixfs.mode).to.equal(mode)
+    })
+
+    it('supports passing different modes for different files', async () => {
+      this.timeout(60 * 1000)
+
+      const mode1 = parseInt('0111', 8)
+      const mode2 = parseInt('0222', 8)
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile,
+        mode: mode1
+      }, {
+        path: '/foo/file2.txt',
+        content: bigFile,
+        mode: mode2
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1.unixfs.mode).to.equal(mode1)
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2.unixfs.mode).to.equal(mode2)
+    })
+
+    it('supports deeply nested files do not inherit custom metadata', async () => {
+      this.timeout(60 * 1000)
+
+      const mode = parseInt('0111', 8)
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile,
+        mode: mode
+      }, {
+        path: '/foo/bar/baz/file2.txt',
+        content: bigFile
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1.unixfs.mode).to.equal(mode)
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2.unixfs.mode).to.not.equal(mode)
+    })
+
+    it('files and directories get default metadata if not specified', async () => {
+      this.timeout(60 * 1000)
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1.unixfs.mode).to.equal(parseInt('0644', 8))
+      expect(node1.unixfs.mtime).to.be.undefined()
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2.unixfs.mode).to.equal(parseInt('0755', 8))
+      expect(node2.unixfs.mtime).to.be.undefined()
+    })
   })
 })