tensorflow · lina128 · Jan 6, 2022 · Jan 6, 2022 · Dec 16, 2021 · Dec 17, 2021
diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts
@@ -19,7 +19,7 @@
 import './flags_webgl';
 
 import * as tf from '@tensorflow/tfjs-core';
-import {backend_util, BackendValues, buffer, DataId, DataStorage, DataType, DataValues, engine, env, kernel_impls, KernelBackend, MemoryInfo, NumericDataType, Rank, RecursiveArray, scalar, ShapeMap, Tensor, Tensor2D, TensorBuffer, TensorInfo, tidy, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
+import {backend_util, BackendValues, buffer, DataId, DataStorage, DataToGPUWebGLOption, DataType, DataValues, engine, env, GPUData, kernel_impls, KernelBackend, MemoryInfo, NumericDataType, Rank, RecursiveArray, scalar, ShapeMap, Tensor, Tensor2D, TensorBuffer, TensorInfo, tidy, TimingInfo, TypedArray, util} from '@tensorflow/tfjs-core';
 
 import {getWebGLContext} from './canvas_util';
 import {DecodeMatrixProgram} from './decode_matrix_gpu';
@@ -387,6 +387,57 @@ export class MathBackendWebGL extends KernelBackend {
     return dTypeVals;
   }
 
+  /**
+   * Read tensor to a new texture that is densely packed for ease of use.
+   * @param dataId The source tensor.
+   * @param options
+   *     customTexShape: Optional. If set, will use the user defined texture
+   *     shape to create the texture.
+   */
+  readToGPU(dataId: DataId, options: DataToGPUWebGLOption = {}): GPUData {
+    const texData = this.texData.get(dataId);
+    const {values, shape, slice, dtype, isPacked, texture} = texData;
+
+    if (dtype === 'complex64') {
+      throw new Error('Does not support reading texture for complex64 dtype.');
+    }
+
+    // The presence of `slice` indicates this tensor is a shallow slice of a
+    // different tensor, and is using that original tensor's texture. Run
+    // `clone` in order to copy that texture and read from it.
+    if (slice != null) {
+      let program;
+      if (isPacked) {
+        program = new UnaryOpPackedProgram(shape, unary_op.CLONE);
+      } else {
+        program = new UnaryOpProgram(shape, unary_op.CLONE);
+      }
+      const res =
+          this.runWebGLProgram(program, [{dataId, shape, dtype}], dtype);
+      const gpuResouorce = this.readToGPU(res, options);
+      this.disposeIntermediateTensorInfo(res);
+      return gpuResouorce;
+    }
+
+    if (texture == null) {
+      if (values != null) {
+        throw new Error('Data is not on GPU but on CPU.');
+      } else {
+        throw new Error('There is no data on GPU or CPU.');
+      }
+    }
+
+    // Decode the texture so that it is stored densely (using four channels).
+    const tmpTarget = this.decode(dataId, options.customTexShape);
+
+    // Make engine track this tensor, so that we can dispose it later.
+    const tensorRef = engine().makeTensorFromDataId(
+        tmpTarget.dataId, tmpTarget.shape, tmpTarget.dtype);
+
+    const tmpData = this.texData.get(tmpTarget.dataId);
+    return {tensorRef, ...tmpData.texture};
+  }
+
   bufferSync<R extends Rank>(t: TensorInfo): TensorBuffer<R> {
     const data = this.readSync(t.dataId);
     let decodedData = data as DataValues;
@@ -753,37 +804,50 @@ export class MathBackendWebGL extends KernelBackend {
     return {dataId: output.dataId, shape: afterShape, dtype: output.dtype};
   }
 
-  private decode(dataId: DataId): TensorInfo {
+  private decode(dataId: DataId, customTexShape?: [number, number]):
+      TensorInfo {
     const texData = this.texData.get(dataId);
     const {isPacked, shape, dtype} = texData;
+    if (customTexShape != null) {
+      const size = util.sizeFromShape(shape);
+      const texSize = customTexShape[0] * customTexShape[1] * 4;
+      util.assert(
+          size <= texSize,
+          () => 'customTexShape is too small. ' +
+              'Row * Column * 4 should be equal or larger than the ' +
+              'size of the tensor data.');
+    }
     const shapeAs3D =
         webgl_util.getShapeAs3D(shape) as [number, number, number];
     let program;
-    const denseTexShape = tex_util.getDenseTexShape(shapeAs3D);
     if (isPacked) {
       program = new DecodeMatrixPackedProgram(shapeAs3D);
     } else {
       program = new DecodeMatrixProgram(shapeAs3D);
     }
     const preventEagerUnpackingOfOutput = true;
-    const customValues = [denseTexShape];
+    const customValues =
+        [customTexShape != null ? customTexShape :
+                                  tex_util.getDenseTexShape(shapeAs3D)];
     const out = this.runWebGLProgram(
         program, [{shape: shapeAs3D, dtype, dataId}], dtype, customValues,
-        preventEagerUnpackingOfOutput);
+        preventEagerUnpackingOfOutput, customTexShape);
     return {dtype, shape, dataId: out.dataId};
   }
 
   runWebGLProgram(
       program: GPGPUProgram, inputs: TensorInfo[], outputDtype: DataType,
-      customUniformValues?: number[][],
-      preventEagerUnpackingOfOutput = false): TensorInfo {
+      customUniformValues?: number[][], preventEagerUnpackingOfOutput = false,
+      customTexShape?: [number, number]): TensorInfo {
     const output = this.makeTensorInfo(program.outputShape, outputDtype);
     const outData = this.texData.get(output.dataId);
     if (program.packedOutput) {
       outData.isPacked = true;
     }
     if (program.outPackingScheme === tex_util.PackingScheme.DENSE) {
-      const texelShape = tex_util.getDenseTexShape(program.outputShape);
+      const texelShape = customTexShape != null ?
+          customTexShape :
+          tex_util.getDenseTexShape(program.outputShape);
       // For a densely packed output, we explicitly set texShape
       // so it doesn't get assigned later according to our typical packing
       // scheme wherein a single texel can only contain values from adjacent
@@ -793,6 +857,7 @@ export class MathBackendWebGL extends KernelBackend {
     if (program.outTexUsage != null) {
       outData.usage = program.outTexUsage;
     }
+
     if (util.sizeFromShape(output.shape) === 0) {
       // Short-circuit the computation since the result is empty (has 0 in its
       // shape).

diff --git a/tfjs-backend-webgl/src/backend_webgl_test.ts b/tfjs-backend-webgl/src/backend_webgl_test.ts
@@ -16,9 +16,10 @@
  */
 
 import * as tf from '@tensorflow/tfjs-core';
-import {engine, test_util, util} from '@tensorflow/tfjs-core';
+import {engine, GPUData, test_util, util} from '@tensorflow/tfjs-core';
 // tslint:disable-next-line: no-imports-from-dist
 import {describeWithFlags} from '@tensorflow/tfjs-core/dist/jasmine_util';
+
 const {expectArraysClose, expectArraysEqual} = test_util;
 const {decodeString} = util;
 
@@ -679,6 +680,250 @@ describeWithFlags('time webgl', WEBGL_ENVS, () => {
      });
 });
 
+describeWithFlags('keeping data on gpu ', WEBGL2_ENVS, () => {
+  let flag: boolean;
+
+  beforeAll(() => {
+    flag = tf.env().getBool('WEBGL_CPU_FORWARD');
+    tf.env().set('WEBGL_CPU_FORWARD', false);
+  });
+
+  afterAll(() => {
+    tf.env().set('WEBGL_CPU_FORWARD', flag);
+  });
+
+  it('has a valid texture for dtype=float32.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const texShape = [2, 2];
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+    const res = b.dataToGPU();
+    expectArraysEqual(res.texShape, texShape);
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+    const buffer = webGLBackend.gpgpu.createBufferFromTexture(
+        res.texture, res.texShape[0], res.texShape[1]);
+    const vals = webGLBackend.gpgpu.downloadFloat32MatrixFromBuffer(buffer, 12);
+    expectArraysEqual(vals, data);
+  });
+
+  it('uses user defined texShape.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+    const texShape = [1, 3] as [number, number];
+    const res = b.dataToGPU({customTexShape: texShape});
+    expectArraysEqual(res.texShape, texShape);
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+    const buffer = webGLBackend.gpgpu.createBufferFromTexture(
+        res.texture, res.texShape[0], res.texShape[1]);
+    const vals = webGLBackend.gpgpu.downloadFloat32MatrixFromBuffer(buffer, 12);
+    expectArraysEqual(vals, data);
+  });
+
+  it('has a valid texture for dtype=int32.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const texShape = [2, 2];
+    const a = tf.tensor(data, [1, 3, 4], 'int32');
+    const b = tf.add(a, 0);
+    const res = b.dataToGPU();
+    expectArraysEqual(res.texShape, texShape);
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+
+    const buffer = webGLBackend.gpgpu.createBufferFromTexture(
+        res.texture, res.texShape[0], res.texShape[1]);
+    const vals = webGLBackend.gpgpu.downloadFloat32MatrixFromBuffer(buffer, 12);
+
+    expectArraysEqual(vals, data);
+  });
+
+  it('has no memory leak.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+    const startNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const startTensor = tf.memory().numTensors;
+    const startDataBuckets = webGLBackend.numDataIds();
+
+    const res = b.dataToGPU();
+    res.tensorRef.dispose();
+
+    const endNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const endTensor = tf.memory().numTensors;
+    const endDataBuckets = webGLBackend.numDataIds();
+
+    expect(endNumBytes).toEqual(startNumBytes);
+    expect(endTensor).toEqual(startTensor);
+    expect(endDataBuckets).toEqual(startDataBuckets);
+  });
+
+  it('can be used in tidy.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+    const startTensor = tf.memory().numTensors;
+    const startDataBuckets = webGLBackend.numDataIds();
+
+    const result = tf.tidy(() => {
+      const a = tf.tensor(data, [1, 3, 4]);
+      const b = tf.add(a, 0);
+      return b.dataToGPU() as {} as tf.Tensor;
+    });
+
+    const endTensor = tf.memory().numTensors;
+    const endDataBuckets = webGLBackend.numDataIds();
+
+    expect(endTensor).toEqual(startTensor + 1);
+    expect(endDataBuckets).toEqual(startDataBuckets + 1);
+
+    const res = result as {} as GPUData;
+    const buffer = webGLBackend.gpgpu.createBufferFromTexture(
+        res.texture, res.texShape[0], res.texShape[1]);
+    const vals = webGLBackend.gpgpu.downloadFloat32MatrixFromBuffer(buffer, 12);
+    expectArraysEqual(vals, data);
+  });
+
+  it('tidy has no memory leak.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+
+    const webGLBackend = tf.backend();
+    const startTensor = tf.memory().numTensors;
+    const startDataBuckets = webGLBackend.numDataIds();
+
+    tf.tidy(() => {
+      const a = tf.tensor(data, [1, 3, 4]);
+      const b = tf.add(a, 0);
+      b.dataToGPU();
+      return b;
+    });
+
+    const endTensor = tf.memory().numTensors;
+    const endDataBuckets = webGLBackend.numDataIds();
+
+    expect(endTensor).toEqual(startTensor + 1);
+    expect(endDataBuckets).toEqual(startDataBuckets + 1);
+  });
+
+  it('throws error when user defined texShape is too small.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+
+    expect(() => {
+      b.dataToGPU({customTexShape: [1, 1]});
+    }).toThrowError();
+  });
+});
+
+describeWithFlags('keeping data on gpu ', WEBGL1_ENVS, () => {
+  let flag: boolean;
+  const webGLBackend = (tf.backend() as MathBackendWebGL);
+
+  beforeAll(() => {
+    flag = tf.env().getBool('WEBGL_CPU_FORWARD');
+    tf.env().set('WEBGL_CPU_FORWARD', false);
+  });
+
+  afterAll(() => {
+    tf.env().set('WEBGL_CPU_FORWARD', flag);
+  });
+
+  it('has a valid texture for dtype=float32.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const texShape = [2, 2];
+    const size = 12;
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+    const res = b.dataToGPU();
+    expectArraysEqual(res.texShape, texShape);
+
+    if (tf.env().getBool('WEBGL_DOWNLOAD_FLOAT_ENABLED')) {
+      const tmpData = webGLBackend.texData.get(res.tensorRef.dataId);
+      const vals = webGLBackend.gpgpu
+                       .downloadMatrixFromPackedTexture(
+                           tmpData.texture.texture, ...tmpData.texture.texShape)
+                       .subarray(0, size);
+      expectArraysEqual(vals, data);
+    }
+  });
+
+  it('uses user defined texShape.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+    const texShape = [1, 3] as [number, number];
+    const size = 12;
+    const res = b.dataToGPU({customTexShape: texShape});
+    expectArraysEqual(res.texShape, texShape);
+
+    if (tf.env().getBool('WEBGL_DOWNLOAD_FLOAT_ENABLED')) {
+      const tmpData = webGLBackend.texData.get(res.tensorRef.dataId);
+      const vals = webGLBackend.gpgpu
+                       .downloadMatrixFromPackedTexture(
+                           tmpData.texture.texture, ...tmpData.texture.texShape)
+                       .subarray(0, size);
+      expectArraysEqual(vals, data);
+    }
+  });
+
+  it('has a valid texture for dtype=int32.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const texShape = [2, 2];
+    const size = 12;
+    const a = tf.tensor(data, [1, 3, 4], 'int32');
+    const b = tf.add(a, 0);
+    const res = b.dataToGPU();
+    expectArraysEqual(res.texShape, texShape);
+
+    if (tf.env().getBool('WEBGL_DOWNLOAD_FLOAT_ENABLED')) {
+      const tmpData = webGLBackend.texData.get(res.tensorRef.dataId);
+      const vals = webGLBackend.gpgpu
+                       .downloadMatrixFromPackedTexture(
+                           tmpData.texture.texture, ...tmpData.texture.texShape)
+                       .subarray(0, size);
+      expectArraysEqual(vals, data);
+    }
+  });
+
+  it('has no memory leak.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+
+    const webGLBackend = tf.backend() as MathBackendWebGL;
+    const startNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const startTensor = tf.memory().numTensors;
+    const startDataBuckets = webGLBackend.numDataIds();
+
+    const res = b.dataToGPU();
+    res.tensorRef.dispose();
+
+    const endNumBytes = (tf.memory() as WebGLMemoryInfo).numBytesInGPU;
+    const endTensor = tf.memory().numTensors;
+    const endDataBuckets = webGLBackend.numDataIds();
+
+    expect(endNumBytes).toEqual(startNumBytes);
+    expect(endTensor).toEqual(startTensor);
+    expect(endDataBuckets).toEqual(startDataBuckets);
+  });
+
+  it('throws error when user defined texShape is too small.', () => {
+    const data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
+    const a = tf.tensor(data, [1, 3, 4]);
+    const b = tf.add(a, 0);
+
+    expect(() => {
+      b.dataToGPU({customTexShape: [1, 1]});
+    }).toThrowError();
+  });
+});
+
 describeWithFlags('caching on cpu', WEBGL_ENVS, () => {
   const customBackendName = 'cache-on-cpu';