Skip to content

Commit e72802b

Browse files
committed
fs: improve promise based readFile performance for big files
This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de>
1 parent 5e57d24 commit e72802b

File tree

3 files changed

+66
-38
lines changed

3 files changed

+66
-38
lines changed

benchmark/fs/readfile-promises.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,19 @@ const filename = path.resolve(tmpdir.path,
1515

1616
const bench = common.createBenchmark(main, {
1717
duration: [5],
18-
len: [1024, 16 * 1024 * 1024],
18+
len: [
19+
1024,
20+
512 * 1024,
21+
4 * 1024 ** 2,
22+
8 * 1024 ** 2,
23+
16 * 1024 ** 2,
24+
32 * 1024 ** 2,
25+
],
26+
encoding: ['', 'utf8'],
1927
concurrent: [1, 10]
2028
});
2129

22-
function main({ len, duration, concurrent }) {
30+
function main({ len, duration, encoding, concurrent }) {
2331
try {
2432
fs.unlinkSync(filename);
2533
} catch {
@@ -44,7 +52,7 @@ function main({ len, duration, concurrent }) {
4452
}, duration * 1000);
4553

4654
function read() {
47-
fs.promises.readFile(filename)
55+
fs.promises.readFile(filename, { encoding })
4856
.then((res) => afterRead(undefined, res))
4957
.catch((err) => afterRead(err));
5058
}

lib/fs.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,9 @@ function readFileAfterStat(err, stats) {
343343
if (err)
344344
return context.close(err);
345345

346+
// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
347+
// wise, similar to the promise based version (less peak memory and chunked
348+
// stringify operations vs multiple C++/JS boundary crossings).
346349
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;
347350

348351
if (size > kIoMaxLength) {
@@ -352,6 +355,9 @@ function readFileAfterStat(err, stats) {
352355

353356
try {
354357
if (size === 0) {
358+
// TODO(BridgeAR): We are able to optimize this in case an encoding is used. If
359+
// that's the case, let's use the StringDecoder and directly concat the
360+
// result and to reuse the former chunk instead of allocating a new one.
355361
context.buffers = [];
356362
} else {
357363
context.buffer = Buffer.allocUnsafeSlow(size);

lib/internal/fs/promises.js

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ const {
8686
promisify,
8787
} = require('internal/util');
8888
const { EventEmitterMixin } = require('internal/event_target');
89+
const { StringDecoder } = require('string_decoder');
8990
const { watch } = require('internal/fs/watchers');
9091
const { isIterable } = require('internal/streams/utils');
9192
const assert = require('internal/assert');
@@ -416,63 +417,76 @@ async function writeFileHandle(filehandle, data, signal, encoding) {
416417

417418
async function readFileHandle(filehandle, options) {
418419
const signal = options?.signal;
420+
const encoding = options?.encoding;
421+
const decoder = encoding && new StringDecoder(encoding);
419422

420423
checkAborted(signal);
421424

422425
const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);
423426

424427
checkAborted(signal);
425428

426-
let size;
429+
let size = 0;
430+
let length = 0;
427431
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
428432
size = statFields[8/* size */];
429-
} else {
430-
size = 0;
433+
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
434+
}
435+
if (length === 0) {
436+
length = kReadFileUnknownBufferLength;
431437
}
432438

433439
if (size > kIoMaxLength)
434440
throw new ERR_FS_FILE_TOO_LARGE(size);
435441

436-
let endOfFile = false;
437442
let totalRead = 0;
438-
const noSize = size === 0;
439-
const buffers = [];
440-
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
441-
do {
443+
let buffer = Buffer.allocUnsafeSlow(length);
444+
let result = '';
445+
let offset = 0;
446+
let buffers;
447+
448+
while (true) {
442449
checkAborted(signal);
443-
let buffer;
444-
let offset;
445-
let length;
446-
if (noSize) {
447-
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
448-
offset = 0;
449-
length = kReadFileUnknownBufferLength;
450-
} else {
451-
buffer = fullBuffer;
452-
offset = totalRead;
453-
length = MathMin(size - totalRead, kReadFileBufferLength);
454-
}
455450

456451
const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
457-
length, -1, kUsePromises)) || 0;
452+
length, -1, kUsePromises)) ?? 0;
458453
totalRead += bytesRead;
459-
endOfFile = bytesRead === 0 || totalRead === size;
460-
if (noSize && bytesRead > 0) {
461-
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
462-
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
463-
ArrayPrototypePush(buffers, chunkBuffer);
454+
455+
if (bytesRead === 0 || totalRead === size || bytesRead !== buffer.length) {
456+
const singleRead = bytesRead === totalRead;
457+
458+
if (bytesRead !== buffer.length) {
459+
buffer = buffer.slice(0, bytesRead);
460+
}
461+
462+
if (!encoding) {
463+
if (size === 0 && !singleRead) {
464+
ArrayPrototypePush(buffers, buffer);
465+
return Buffer.concat(buffers, totalRead);
466+
}
467+
return buffer;
468+
}
469+
470+
if (singleRead) {
471+
return buffer.toString(encoding);
472+
}
473+
result += decoder.end(buffer);
474+
return result;
464475
}
465-
} while (!endOfFile);
466476

467-
let result;
468-
if (size > 0) {
469-
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
470-
} else {
471-
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
472-
totalRead);
477+
if (encoding) {
478+
result += decoder.write(buffer);
479+
} else if (size !== 0) {
480+
// TODO(BridgeAR): This condition needs a test. A file should be written
481+
// that is chunked without encoding.
482+
offset += bytesRead;
483+
} else {
484+
buffers ??= [];
485+
// Unknown file size requires chunks.
486+
ArrayPrototypePush(buffers, buffer);
487+
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
488+
}
473489
}
474-
475-
return options.encoding ? result.toString(options.encoding) : result;
476490
}
477491

478492
// All of the functions are defined as async in order to ensure that errors

0 commit comments

Comments
 (0)