From 67a4ce1c6e230508ba307502e0937a63a7e07482 Mon Sep 17 00:00:00 2001
From: Jamie Davis <davisjam@vt.edu>
Date: Wed, 15 Nov 2017 12:28:04 -0500
Subject: [PATCH] fs: partition readFile against pool exhaustion

Problem:

Node implements fs.readFile as:
- a call to stat, then
- a C++ -> libuv request to read the entire file using the stat size

Why is this bad?
The effect is to place on the libuv threadpool a potentially-large
read request, occupying the libuv thread until it completes.
While readFile certainly requires buffering the entire file contents,
it can partition the read into smaller buffers
(as is done on other read paths)
along the way to avoid threadpool exhaustion.

If the file is relatively large or stored on a slow medium, reading
the entire file in one shot seems particularly harmful,
and presents a possible DoS vector.

Solution:

Partition the read into multiple smaller requests.

Considerations:

1. Correctness

I don't think partitioning the read like this raises
any additional risk of read-write races on the FS.
If the application is concurrently readFile'ing and modifying the file,
it will already see funny behavior. Though libuv uses preadv where
available, this doesn't guarantee read atomicity in the presence of
concurrent writes.

2. Performance

Downside: Partitioning means that a single large readFile will
  require into many "out and back" requests to libuv,
  introducing overhead.
Upside: In between each "out and back", other work pending on the
  threadpool can take a turn.

In short, although partitioning will slow down a large request,
it will lead to better throughput if the threadpool is handling
more than one type of request.

Fixes: https://github.com/nodejs/node/issues/17047

PR-URL: https://github.com/nodejs/node/pull/17054
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com>
Reviewed-By: Gireesh Punathil <gpunathi@in.ibm.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Sakthipriyan Vairamani <thechargingvolcano@gmail.com>
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
---
 benchmark/fs/readfile-partitioned.js | 86 ++++++++++++++++++++++++++++
 benchmark/fs/readfile.js             | 15 +++--
 doc/api/fs.md                        |  7 +--
 lib/fs.js                            |  2 +-
 test/parallel/test-fs-readfile.js    | 58 +++++++++++++++++++
 5 files changed, 159 insertions(+), 9 deletions(-)
 create mode 100644 benchmark/fs/readfile-partitioned.js
 create mode 100644 test/parallel/test-fs-readfile.js

diff --git a/benchmark/fs/readfile-partitioned.js b/benchmark/fs/readfile-partitioned.js
new file mode 100644
index 00000000000000..be3b7fd057bbe0
--- /dev/null
+++ b/benchmark/fs/readfile-partitioned.js
@@ -0,0 +1,86 @@
+// Submit a mix of short and long jobs to the threadpool.
+// Report total job throughput.
+// If we partition the long job, overall job throughput goes up significantly.
+// However, this comes at the cost of the long job throughput.
+//
+// Short jobs: small zip jobs.
+// Long jobs: fs.readFile on a large file.
+
+'use strict';
+
+const path = require('path');
+const common = require('../common.js');
+const filename = path.resolve(__dirname,
+                              `.removeme-benchmark-garbage-${process.pid}`);
+const fs = require('fs');
+const zlib = require('zlib');
+const assert = require('assert');
+
+const bench = common.createBenchmark(main, {
+  dur: [5],
+  len: [1024, 16 * 1024 * 1024],
+  concurrent: [1, 10]
+});
+
+function main(conf) {
+  const len = +conf.len;
+  try { fs.unlinkSync(filename); } catch (e) {}
+  var data = Buffer.alloc(len, 'x');
+  fs.writeFileSync(filename, data);
+  data = null;
+
+  var zipData = Buffer.alloc(1024, 'a');
+
+  var reads = 0;
+  var zips = 0;
+  var benchEnded = false;
+  bench.start();
+  setTimeout(function() {
+    const totalOps = reads + zips;
+    benchEnded = true;
+    bench.end(totalOps);
+    try { fs.unlinkSync(filename); } catch (e) {}
+  }, +conf.dur * 1000);
+
+  function read() {
+    fs.readFile(filename, afterRead);
+  }
+
+  function afterRead(er, data) {
+    if (er) {
+      if (er.code === 'ENOENT') {
+        // Only OK if unlinked by the timer from main.
+        assert.ok(benchEnded);
+        return;
+      }
+      throw er;
+    }
+
+    if (data.length !== len)
+      throw new Error('wrong number of bytes returned');
+
+    reads++;
+    if (!benchEnded)
+      read();
+  }
+
+  function zip() {
+    zlib.deflate(zipData, afterZip);
+  }
+
+  function afterZip(er, data) {
+    if (er)
+      throw er;
+
+    zips++;
+    if (!benchEnded)
+      zip();
+  }
+
+  // Start reads
+  var cur = +conf.concurrent;
+  while (cur--) read();
+
+  // Start a competing zip
+  zip();
+}
diff --git a/benchmark/fs/readfile.js b/benchmark/fs/readfile.js
index 7da7758ed06638..282b4ac7621340 100644
--- a/benchmark/fs/readfile.js
+++ b/benchmark/fs/readfile.js
@@ -8,6 +8,7 @@ const common = require('../common.js');
 const filename = path.resolve(process.env.NODE_TMPDIR || __dirname,
                               `.removeme-benchmark-garbage-${process.pid}`);
 const fs = require('fs');
+const assert = require('assert');
 
 const bench = common.createBenchmark(main, {
   dur: [5],
@@ -22,10 +23,10 @@ function main({ len, dur, concurrent }) {
   data = null;
 
   var reads = 0;
-  var bench_ended = false;
+  var benchEnded = false;
   bench.start();
   setTimeout(function() {
-    bench_ended = true;
+    benchEnded = true;
     bench.end(reads);
     try { fs.unlinkSync(filename); } catch (e) {}
     process.exit(0);
@@ -36,14 +37,20 @@ function main({ len, dur, concurrent }) {
   }
 
   function afterRead(er, data) {
-    if (er)
+    if (er) {
+      if (er.code === 'ENOENT') {
+        // Only OK if unlinked by the timer from main.
+        assert.ok(benchEnded);
+        return;
+      }
       throw er;
+    }
 
     if (data.length !== len)
       throw new Error('wrong number of bytes returned');
 
     reads++;
-    if (!bench_ended)
+    if (!benchEnded)
       read();
   }
 
diff --git a/doc/api/fs.md b/doc/api/fs.md
index d5e7f75ebe8bc1..00c6d946a0b355 100644
--- a/doc/api/fs.md
+++ b/doc/api/fs.md
@@ -2250,10 +2250,9 @@ Any specified file descriptor has to support reading.
 *Note*: If a file descriptor is specified as the `path`, it will not be closed
 automatically.
 
-*Note*: `fs.readFile()` reads the entire file in a single threadpool request.
-To minimize threadpool task length variation, prefer the partitioned APIs
-`fs.read()` and `fs.createReadStream()` when reading files as part of
-fulfilling a client request.
+*Note*: `fs.readFile()` buffers the entire file.
+To minimize memory costs, when possible prefer streaming via
+`fs.createReadStream()`.
 
 ## fs.readFileSync(path[, options])
 <!-- YAML
diff --git a/lib/fs.js b/lib/fs.js
index d4f5ee0909bcef..0b9cf0cc9b8190 100644
--- a/lib/fs.js
+++ b/lib/fs.js
@@ -508,7 +508,7 @@ ReadFileContext.prototype.read = function() {
   } else {
     buffer = this.buffer;
     offset = this.pos;
-    length = this.size - this.pos;
+    length = Math.min(kReadFileBufferLength, this.size - this.pos);
   }
 
   var req = new FSReqWrap();
diff --git a/test/parallel/test-fs-readfile.js b/test/parallel/test-fs-readfile.js
new file mode 100644
index 00000000000000..689f98ff849ded
--- /dev/null
+++ b/test/parallel/test-fs-readfile.js
@@ -0,0 +1,58 @@
+'use strict';
+const common = require('../common');
+
+// This test ensures that fs.readFile correctly returns the
+// contents of varying-sized files.
+
+const assert = require('assert');
+const fs = require('fs');
+const path = require('path');
+
+const prefix = `.removeme-fs-readfile-${process.pid}`;
+
+common.refreshTmpDir();
+
+const fileInfo = [
+  { name: path.join(common.tmpDir, `${prefix}-1K.txt`),
+    len: 1024,
+  },
+  { name: path.join(common.tmpDir, `${prefix}-64K.txt`),
+    len: 64 * 1024,
+  },
+  { name: path.join(common.tmpDir, `${prefix}-64KLessOne.txt`),
+    len: (64 * 1024) - 1,
+  },
+  { name: path.join(common.tmpDir, `${prefix}-1M.txt`),
+    len: 1 * 1024 * 1024,
+  },
+  { name: path.join(common.tmpDir, `${prefix}-1MPlusOne.txt`),
+    len: (1 * 1024 * 1024) + 1,
+  },
+];
+
+// Populate each fileInfo (and file) with unique fill.
+const sectorSize = 512;
+for (const e of fileInfo) {
+  e.contents = Buffer.allocUnsafe(e.len);
+
+  // This accounts for anything unusual in Node's implementation of readFile.
+  // Using e.g. 'aa...aa' would miss bugs like Node re-reading
+  // the same section twice instead of two separate sections.
+  for (let offset = 0; offset < e.len; offset += sectorSize) {
+    const fillByte = 256 * Math.random();
+    const nBytesToFill = Math.min(sectorSize, e.len - offset);
+    e.contents.fill(fillByte, offset, offset + nBytesToFill);
+  }
+
+  fs.writeFileSync(e.name, e.contents);
+}
+// All files are now populated.
+
+// Test readFile on each size.
+for (const e of fileInfo) {
+  fs.readFile(e.name, common.mustCall((err, buf) => {
+    console.log(`Validating readFile on file ${e.name} of length ${e.len}`);
+    assert.ifError(err, 'An error occurred');
+    assert.deepStrictEqual(buf, e.contents, 'Incorrect file contents');
+  }));
+}