Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gatsby): granular redux cache heuristics for finding large nodes #23643

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions packages/gatsby/src/redux/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,20 @@ export const store: Store<IGatsbyState> = configureStore(readState())
export const saveState = (): void => {
const state = store.getState()

return writeToCache({
nodes: state.nodes,
status: state.status,
componentDataDependencies: state.componentDataDependencies,
components: state.components,
jobsV2: state.jobsV2,
staticQueryComponents: state.staticQueryComponents,
webpackCompilationHash: state.webpackCompilationHash,
pageDataStats: state.pageDataStats,
pageData: state.pageData,
})
return writeToCache(
{
nodes: state.nodes,
status: state.status,
componentDataDependencies: state.componentDataDependencies,
components: state.components,
jobsV2: state.jobsV2,
staticQueryComponents: state.staticQueryComponents,
webpackCompilationHash: state.webpackCompilationHash,
pageDataStats: state.pageDataStats,
pageData: state.pageData,
},
state.nodesByType
)
}

store.subscribe(() => {
Expand Down
37 changes: 22 additions & 15 deletions packages/gatsby/src/redux/persist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,24 @@ export function readFromCache(): ICachedReduxState {
return obj
}

function guessSafeChunkSize(values: [string, IGatsbyNode][]): number {
function guessSafeChunkSize(
nodesByType: Map<string, Map<string, IGatsbyNode>>
): number {
// Pick a few random elements and measure their size then pick a chunk size
// ceiling based on the worst case. Each test takes time so there's trade-off.
// This attempts to prevent small sites with very large pages from OOMing.
// This heuristic could still fail if it randomly grabs the smallest nodes.
// TODO: test a few nodes per each type instead of from all nodes

const nodesToTest = 11 // Very arbitrary number
const valueCount = values.length
const step = Math.max(1, Math.ceil(valueCount / nodesToTest))
let maxSize = 0
for (let i = 0; i < valueCount; i += step) {
const size = v8.serialize(values[i]).length
maxSize = Math.max(size, maxSize)
}

const nodesToTest = 11 // Very arbitrary number. Count is per type.
let maxSize = 1
nodesByType.forEach(nodes => {
const valueCount = nodes.size
const step = Math.max(1, Math.ceil(valueCount / nodesToTest))
for (let i = 0; i < valueCount; i += step) {
const size = v8.serialize(nodes[i]).length
maxSize = Math.max(size, maxSize)
}
})

// Max size of a Buffer is 2gb (yeah, we're assuming 64bit system)
// https://stackoverflow.com/questions/8974375/whats-the-maximum-size-of-a-node-js-buffer
Expand All @@ -94,7 +97,8 @@ function guessSafeChunkSize(values: [string, IGatsbyNode][]): number {

function prepareCacheFolder(
targetDir: string,
contents: ICachedReduxState
contents: ICachedReduxState,
nodesByType: Map<string, Map<string, IGatsbyNode>>
): void {
// Temporarily save the nodes and remove them from the main redux store
// This prevents an OOM when the page nodes collectively contain to much data
Expand All @@ -108,7 +112,7 @@ function prepareCacheFolder(
if (map) {
// Now store the nodes separately, chunk size determined by a heuristic
const values: [string, IGatsbyNode][] = [...map.entries()]
const chunkSize = guessSafeChunkSize(values)
const chunkSize = guessSafeChunkSize(nodesByType)
const chunks = Math.ceil(values.length / chunkSize)

for (let i = 0; i < chunks; ++i) {
Expand Down Expand Up @@ -136,13 +140,16 @@ function safelyRenameToBak(reduxCacheFolder: string): string {
return bakName
}

export function writeToCache(contents: ICachedReduxState): void {
export function writeToCache(
contents: ICachedReduxState,
nodesByType: Map<string, Map<string, IGatsbyNode>>
): void {
// Note: this should be a transactional operation. So work in a tmp dir and
// make sure the cache cannot be left in a corruptable state due to errors.

const tmpDir = mkdtempSync(path.join(os.tmpdir(), `reduxcache`)) // linux / windows

prepareCacheFolder(tmpDir, contents)
prepareCacheFolder(tmpDir, contents, nodesByType)

// Replace old cache folder with new. If the first rename fails, the cache
// is just stale. If the second rename fails, the cache is empty. In either
Expand Down