From ef85b72e9318015ecac6b3a802427576518af9be Mon Sep 17 00:00:00 2001 From: Peter van der Zee Date: Thu, 30 Apr 2020 14:28:03 +0200 Subject: [PATCH 1/2] fix(gatsby): more granular heuristics for finding large nodes for redux cache --- packages/gatsby/src/redux/index.ts | 25 ++++++++++--------- packages/gatsby/src/redux/persist.ts | 37 +++++++++++++++++----------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/packages/gatsby/src/redux/index.ts b/packages/gatsby/src/redux/index.ts index 92d52dfc27b10..47acae0ef25f1 100644 --- a/packages/gatsby/src/redux/index.ts +++ b/packages/gatsby/src/redux/index.ts @@ -68,17 +68,20 @@ export const store: Store = configureStore(readState()) export const saveState = (): void => { const state = store.getState() - return writeToCache({ - nodes: state.nodes, - status: state.status, - componentDataDependencies: state.componentDataDependencies, - components: state.components, - jobsV2: state.jobsV2, - staticQueryComponents: state.staticQueryComponents, - webpackCompilationHash: state.webpackCompilationHash, - pageDataStats: state.pageDataStats, - pageData: state.pageData, - }) + return writeToCache( + { + nodes: state.nodes, + status: state.status, + componentDataDependencies: state.componentDataDependencies, + components: state.components, + jobsV2: state.jobsV2, + staticQueryComponents: state.staticQueryComponents, + webpackCompilationHash: state.webpackCompilationHash, + pageDataStats: state.pageDataStats, + pageData: state.pageData, + }, + state.nodesByType + ) } store.subscribe(() => { diff --git a/packages/gatsby/src/redux/persist.ts b/packages/gatsby/src/redux/persist.ts index 400010484b12d..c4177ca6d7765 100644 --- a/packages/gatsby/src/redux/persist.ts +++ b/packages/gatsby/src/redux/persist.ts @@ -70,21 +70,24 @@ export function readFromCache(): ICachedReduxState { return obj } -function guessSafeChunkSize(values: [string, IGatsbyNode][]): number { +function guessSafeChunkSize( + nodesByType: Map> +): number { // Pick a few random elements and measure their size then pick a chunk size // ceiling based on the worst case. Each test takes time so there's trade-off. // This attempts to prevent small sites with very large pages from OOMing. // This heuristic could still fail if it randomly grabs the smallest nodes. - // TODO: test a few nodes per each type instead of from all nodes - - const nodesToTest = 11 // Very arbitrary number - const valueCount = values.length - const step = Math.max(1, Math.ceil(valueCount / nodesToTest)) - let maxSize = 0 - for (let i = 0; i < valueCount; i += step) { - const size = v8.serialize(values[i]).length - maxSize = Math.max(size, maxSize) - } + + const nodesToTest = 11 // Very arbitrary number. Count is per type. + let maxSize = 1 + nodesByType.forEach(nodes => { + const valueCount = nodes.size + const step = Math.max(1, Math.ceil(valueCount / nodesToTest)) + for (let i = 0; i < valueCount; i += step) { + const size = v8.serialize(nodes[i]).length + maxSize = Math.max(size, maxSize) + } + }) // Max size of a Buffer is 2gb (yeah, we're assuming 64bit system) // https://stackoverflow.com/questions/8974375/whats-the-maximum-size-of-a-node-js-buffer @@ -94,7 +97,8 @@ function guessSafeChunkSize(values: [string, IGatsbyNode][]): number { function prepareCacheFolder( targetDir: string, - contents: ICachedReduxState + contents: ICachedReduxState, + nodesByType: Map> ): void { // Temporarily save the nodes and remove them from the main redux store // This prevents an OOM when the page nodes collectively contain to much data @@ -108,7 +112,7 @@ function prepareCacheFolder( if (map) { // Now store the nodes separately, chunk size determined by a heuristic const values: [string, IGatsbyNode][] = [...map.entries()] - const chunkSize = guessSafeChunkSize(values) + const chunkSize = guessSafeChunkSize(nodesByType) const chunks = Math.ceil(values.length / chunkSize) for (let i = 0; i < chunks; ++i) { @@ -136,13 +140,16 @@ function safelyRenameToBak(reduxCacheFolder: string): string { return bakName } -export function writeToCache(contents: ICachedReduxState): void { +export function writeToCache( + contents: ICachedReduxState, + nodesByType: Map> +): void { // Note: this should be a transactional operation. So work in a tmp dir and // make sure the cache cannot be left in a corruptable state due to errors. const tmpDir = mkdtempSync(path.join(os.tmpdir(), `reduxcache`)) // linux / windows - prepareCacheFolder(tmpDir, contents) + prepareCacheFolder(tmpDir, contents, nodesByType) // Replace old cache folder with new. If the first rename fails, the cache // is just stale. If the second rename fails, the cache is empty. In either From 3efe4e9783c3f9693df2e846a0d44823e3def24a Mon Sep 17 00:00:00 2001 From: Peter van der Zee Date: Mon, 11 May 2020 15:51:35 +0200 Subject: [PATCH 2/2] Add test, fix bug, delete branch --- packages/gatsby/src/redux/__tests__/index.js | 36 ++++++++++++++++++++ packages/gatsby/src/redux/persist.ts | 9 ++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/packages/gatsby/src/redux/__tests__/index.js b/packages/gatsby/src/redux/__tests__/index.js index b8327549acf1b..c252677b38816 100644 --- a/packages/gatsby/src/redux/__tests__/index.js +++ b/packages/gatsby/src/redux/__tests__/index.js @@ -3,6 +3,7 @@ const path = require(`path`) const writeToCache = jest.spyOn(require(`../persist`), `writeToCache`) const { saveState, store, readState } = require(`../index`) +import { guessSafeChunkSize } from "../persist" const { actions: { createPage }, @@ -87,6 +88,21 @@ function getFakeNodes() { return map } +function getLargeNodes(n, type) { + let map /*: Map*/ = new Map() + for (let i = 0; i < n; ++i) { + const id = `page_${type}_${i}` + map.set(id, { + id, + data: `x`.repeat(500000), + internal: { + type, + }, + }) + } + return map +} + describe(`redux db`, () => { const initialComponentsState = _.cloneDeep(store.getState().components) @@ -149,4 +165,24 @@ describe(`redux db`, () => { expect(mockWrittenContent.has(legacyLocation)).toBe(false) }) + + it(`should warn for large nodes`, async () => { + const nodes = new Map([ + [`Ding`, getLargeNodes(30, `Ding`)], + [`Dong`, getLargeNodes(30, `Dong`)], + [`Dang`, getLargeNodes(30, `Dang`)], + ]) + + const maxBuf = 1.5 * 1024 * 1024 * 1024 // it's 2gb, actually. We want a margin. + + // Math.floor(() / maxSize) + const node = [...[...nodes.values()][0].values()][0] + const nodeSize = require(`v8`).serialize(node).length + const expectedChunkSize = maxBuf / nodeSize + + const actualChunkSize = guessSafeChunkSize(nodes) + + // Allow some rounding errors for margin + expect(Math.abs(actualChunkSize - expectedChunkSize) <= 1).toBe(true) + }) }) diff --git a/packages/gatsby/src/redux/persist.ts b/packages/gatsby/src/redux/persist.ts index c4177ca6d7765..2eccf645a1047 100644 --- a/packages/gatsby/src/redux/persist.ts +++ b/packages/gatsby/src/redux/persist.ts @@ -70,7 +70,7 @@ export function readFromCache(): ICachedReduxState { return obj } -function guessSafeChunkSize( +export function guessSafeChunkSize( nodesByType: Map> ): number { // Pick a few random elements and measure their size then pick a chunk size @@ -80,8 +80,9 @@ function guessSafeChunkSize( const nodesToTest = 11 // Very arbitrary number. Count is per type. let maxSize = 1 - nodesByType.forEach(nodes => { - const valueCount = nodes.size + nodesByType.forEach((nodesMap: Map) => { + const nodes = [...nodesMap.values()] + const valueCount = nodes.length const step = Math.max(1, Math.ceil(valueCount / nodesToTest)) for (let i = 0; i < valueCount; i += step) { const size = v8.serialize(nodes[i]).length @@ -145,7 +146,7 @@ export function writeToCache( nodesByType: Map> ): void { // Note: this should be a transactional operation. So work in a tmp dir and - // make sure the cache cannot be left in a corruptable state due to errors. + // make sure the cache cannot be left in a corruptible state due to errors. const tmpDir = mkdtempSync(path.join(os.tmpdir(), `reduxcache`)) // linux / windows