From 1ae3137fccbfd63c03575e81e8e0f7eb575e6f2b Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Thu, 6 Jan 2022 09:08:59 -0800 Subject: [PATCH 1/2] add test for layers model fit mem leak --- tfjs-layers/src/engine/training.ts | 2 ++ tfjs-layers/src/engine/training_tensors.ts | 6 +++++ tfjs-layers/src/engine/training_test.ts | 27 ++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/tfjs-layers/src/engine/training.ts b/tfjs-layers/src/engine/training.ts index d3da8b5627c..83c6f08ec1f 100644 --- a/tfjs-layers/src/engine/training.ts +++ b/tfjs-layers/src/engine/training.ts @@ -1535,6 +1535,8 @@ export class LayersModel extends Container implements tfc.InferenceModel { lossValues.push(v[0]); } tfc.dispose(losses); + disposeNewTensors(standardizeOut[0], x); + disposeNewTensors(standardizeOut[1], y); return singletonOrArray(lossValues); } diff --git a/tfjs-layers/src/engine/training_tensors.ts b/tfjs-layers/src/engine/training_tensors.ts index 5473631775d..7d721b9b6ee 100644 --- a/tfjs-layers/src/engine/training_tensors.ts +++ b/tfjs-layers/src/engine/training_tensors.ts @@ -423,6 +423,8 @@ export async function fitTensors( model.isTraining = true; let inputs: Tensor[]; let targets: Tensor[]; + // let originalInputs: Tensor[]; + // let originalTargets: Tensor[]; let inputValX: Tensor|Tensor[]; let inputValY: Tensor|Tensor[]; let valX: Tensor|Tensor[]; @@ -481,8 +483,10 @@ export async function fitTensors( Math.floor(inputs[0].shape[0] * (1 - args.validationSplit)); const originalBatchSize = inputs[0].shape[0]; valX = sliceArrays(inputs, splitAt, originalBatchSize) as Tensor[]; + // originalInputs = inputs; inputs = sliceArrays(inputs, 0, splitAt) as Tensor[]; valY = sliceArrays(targets, splitAt, originalBatchSize) as Tensor[]; + // originalTargets = targets; targets = sliceArrays(targets, 0, splitAt) as Tensor[]; // TODO(cais): Once sampleWeights becomes available, slice it to get // valSampleWeights. @@ -537,6 +541,8 @@ export async function fitTensors( // Memory clean up. disposeNewTensors(inputs, x); disposeNewTensors(targets, y); + // disposeNewTensors(originalInputs, x); + // disposeNewTensors(originalTargets, y); disposeNewTensors(valX as Tensor[], inputValX); disposeNewTensors(valY as Tensor[], inputValY); if (sampleWeights != null) { diff --git a/tfjs-layers/src/engine/training_test.ts b/tfjs-layers/src/engine/training_test.ts index cbfe382c839..aaa500633e8 100644 --- a/tfjs-layers/src/engine/training_test.ts +++ b/tfjs-layers/src/engine/training_test.ts @@ -1999,6 +1999,33 @@ describeMathCPUAndWebGL2('LayersModel.fit: No memory leak', () => { done(); }); + it('Repeated fit calls of 1d target leads to no memory leak: validationSplit', + async done => { + createDenseModelAndData(); + + const validationSplit = 0.4; + targets = ones([numSamples]); + model.compile({optimizer: 'SGD', loss: 'meanSquaredError'}); + // Use batchSize === numSamples to get exactly one batch. + await model.fit( + inputs, targets, + {batchSize: numSamples, epochs: 1, validationSplit}); + const numTensors0 = memory().numTensors; + for (let i = 0; i < 2; ++i) { + await model.fit( + inputs, targets, + {batchSize: numSamples, epochs: 1, validationSplit}); + const numTensorsNow = memory().numTensors; + if (numTensorsNow > numTensors0) { + done.fail( + `Memory leak detected during fit(): Leaked ` + + `${numTensorsNow - numTensors0} tensor(s) after the ` + + `${i + 1}-th fit() call.`); + } + } + done(); + }); + it('Repeated fit calls leads to no memory leak: validationData', async done => { createDenseModelAndData(); From dedef2a5dea4376588abaf272884bcb803c29ebd Mon Sep 17 00:00:00 2001 From: Ping Yu <4018+pyu10055@users.noreply.github.com> Date: Thu, 6 Jan 2022 09:52:27 -0800 Subject: [PATCH 2/2] fix mem leak --- tfjs-layers/src/engine/training_tensors.ts | 12 ++++++------ tfjs-layers/src/engine/training_test.ts | 6 +++--- tfjs-layers/src/utils/test_utils.ts | 8 ++++++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/tfjs-layers/src/engine/training_tensors.ts b/tfjs-layers/src/engine/training_tensors.ts index 7d721b9b6ee..77abf5c0611 100644 --- a/tfjs-layers/src/engine/training_tensors.ts +++ b/tfjs-layers/src/engine/training_tensors.ts @@ -423,8 +423,8 @@ export async function fitTensors( model.isTraining = true; let inputs: Tensor[]; let targets: Tensor[]; - // let originalInputs: Tensor[]; - // let originalTargets: Tensor[]; + let originalInputs: Tensor[]; + let originalTargets: Tensor[]; let inputValX: Tensor|Tensor[]; let inputValY: Tensor|Tensor[]; let valX: Tensor|Tensor[]; @@ -483,10 +483,10 @@ export async function fitTensors( Math.floor(inputs[0].shape[0] * (1 - args.validationSplit)); const originalBatchSize = inputs[0].shape[0]; valX = sliceArrays(inputs, splitAt, originalBatchSize) as Tensor[]; - // originalInputs = inputs; + originalInputs = inputs; inputs = sliceArrays(inputs, 0, splitAt) as Tensor[]; valY = sliceArrays(targets, splitAt, originalBatchSize) as Tensor[]; - // originalTargets = targets; + originalTargets = targets; targets = sliceArrays(targets, 0, splitAt) as Tensor[]; // TODO(cais): Once sampleWeights becomes available, slice it to get // valSampleWeights. @@ -541,8 +541,8 @@ export async function fitTensors( // Memory clean up. disposeNewTensors(inputs, x); disposeNewTensors(targets, y); - // disposeNewTensors(originalInputs, x); - // disposeNewTensors(originalTargets, y); + disposeNewTensors(originalInputs, x); + disposeNewTensors(originalTargets, y); disposeNewTensors(valX as Tensor[], inputValX); disposeNewTensors(valY as Tensor[], inputValY); if (sampleWeights != null) { diff --git a/tfjs-layers/src/engine/training_test.ts b/tfjs-layers/src/engine/training_test.ts index aaa500633e8..48dfc69d0cd 100644 --- a/tfjs-layers/src/engine/training_test.ts +++ b/tfjs-layers/src/engine/training_test.ts @@ -2006,15 +2006,15 @@ describeMathCPUAndWebGL2('LayersModel.fit: No memory leak', () => { const validationSplit = 0.4; targets = ones([numSamples]); model.compile({optimizer: 'SGD', loss: 'meanSquaredError'}); + const numTensors0 = memory().numTensors; // Use batchSize === numSamples to get exactly one batch. await model.fit( inputs, targets, - {batchSize: numSamples, epochs: 1, validationSplit}); - const numTensors0 = memory().numTensors; + {batchSize: 2, epochs: 10, validationSplit, shuffle: true}); for (let i = 0; i < 2; ++i) { await model.fit( inputs, targets, - {batchSize: numSamples, epochs: 1, validationSplit}); + {batchSize: 2, epochs: 10, validationSplit, shuffle: true}); const numTensorsNow = memory().numTensors; if (numTensorsNow > numTensors0) { done.fail( diff --git a/tfjs-layers/src/utils/test_utils.ts b/tfjs-layers/src/utils/test_utils.ts index 46848c78d46..98289759b12 100644 --- a/tfjs-layers/src/utils/test_utils.ts +++ b/tfjs-layers/src/utils/test_utils.ts @@ -95,7 +95,10 @@ export function describeMathCPUAndGPU(testName: string, tests: () => void) { */ export function describeMathCPUAndWebGL2(testName: string, tests: () => void) { describeWithFlags( - testName, {predicate: testEnv => testEnv.flags['WEBGL_VERSION'] !== 1}, + testName, { + predicate: testEnv => + (testEnv.flags == null || testEnv.flags['WEBGL_VERSION'] === 2) + }, () => { tests(); }); @@ -134,7 +137,8 @@ export function describeMathWebGL2(testName: string, tests: () => void) { describeWithFlags( testName, { predicate: testEnv => testEnv.backendName === 'webgl' && - testEnv.flags['WEBGL_VERSION'] !== 1 + (testEnv.flags == null || testEnv.flags['WEBGL_VERSION'] === 2) + }, () => { tests();