chore: Prepare for release

imgly · May 10, 2024 · 8b26f6f · 8b26f6f
1 parent d2a35bc
commit 8b26f6f
Show file tree

Hide file tree

Showing 19 changed files with 1,114 additions and 356 deletions.
diff --git a/.nvmrc b/.nvmrc
@@ -0,0 +1 @@
+v20.13.1
diff --git a/bundle/models/vitmatte_s b/bundle/models/vitmatte_s
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -31,6 +31,7 @@
     "dotenv": "~16.3.1",
     "ejs": "~3.1.9",
     "es-main": "~1.3.0",
+    "esbuild": "^0.21.1",
     "glob": "~10.3.3",
     "husky": "^9.0.11",
     "lint-staged": "^15.2.2",

diff --git a/packages/node-examples/src/example_001.cjs b/packages/node-examples/src/example_001.cjs
@@ -28,10 +28,6 @@ async function run() {
     },
     // model: 'small',
     model: 'isnet',
-    // model: 'large',
-    // model: 'modnet',
-    // model: 'modnet_fp16',
-    // model: 'modnet_quint8',
     output: {
       quality: 0.8,
       format: 'image/webp' //image/jpeg, image/webp

diff --git a/packages/node/CHANGELOG.md b/packages/node/CHANGELOG.md
@@ -8,8 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Added
 
-- Added Modnet models
-
 - Added isnet model for webgpu
 
 ## [1.4.5]

diff --git a/packages/node/ThirdPartyLicenses.json b/packages/node/ThirdPartyLicenses.json
@@ -8,10 +8,5 @@
     "source": "https://github.com/xuebinqin/DIS",
     "type": "model",
     "license": "MIT"
-  },
-  "Modnet": {
-    "source": "https://github.com/ZHKKKe/MODNet",
-    "type": "model",
-    "license": "Apache-2.0"
   }
 }
diff --git a/packages/node/changelog/Unreleased/20242403201448-Added_Modnet_models_Added.yaml b/packages/node/changelog/Unreleased/20242403201448-Added_Modnet_models_Added.yaml
diff --git a/packages/node/src/codecs.ts b/packages/node/src/codecs.ts
@@ -29,6 +29,7 @@ async function imageDecode(blob: Blob): Promise<NdArray<Uint8Array>> {
     case 'application/octet-stream':
     case `image/png`:
     case `image/jpeg`:
+    case `image/jpg`:
     case `image/webp`: {
       const decoded = sharp(buffer);
       let { width, height, channels } = await decoded.metadata();

diff --git a/packages/node/src/schema.ts b/packages/node/src/schema.ts
@@ -59,13 +59,7 @@ const ConfigSchema = z
               return val;
           }
         },
-        z.enum([
-          'isnet',
-          'isnet_fp16',
-          'isnet_quint8',
-          'modnet',
-          'modnet_fp16' /*, 'modnet_quint8'*/
-        ])
+        z.enum(['isnet', 'isnet_fp16', 'isnet_quint8'])
       )
       .default('medium'),
     output: z

diff --git a/packages/web-data/ThirdPartyLicenses.json b/packages/web-data/ThirdPartyLicenses.json
@@ -8,10 +8,5 @@
     "source": "https://github.com/xuebinqin/DIS",
     "type": "model",
     "license": "MIT"
-  },
-  "Modnet": {
-    "source": "https://github.com/ZHKKKe/MODNet",
-    "type": "model",
-    "license": "Apache-2.0"
   }
 }
diff --git a/packages/web-examples/vite-project/src/App.vue b/packages/web-examples/vite-project/src/App.vue
@@ -1,5 +1,3 @@
-<!--Try this http://localhost:5173/?auto=1&image=https://images.unsplash.com/photo-1709248835088-03bb0946d6ab -->
-http://localhost:5173/?auto=1&image=http://localhost:5173/images/tile_0.webp
 <script>
 import { ref, watch, onMounted, onUnmounted } from 'vue';
 
@@ -8,15 +6,16 @@ import {
   removeBackground,
   removeForeground,
   segmentForeground,
+  alphamask,
   applySegmentationMask
 } from '@imgly/background-removal';
 
 export default {
   name: 'App',
   setup() {
     const images = [
-      // 'https://images.unsplash.com/photo-1656408308602-05835d990fb1?q=80&w=3200&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D',
-      // 'https://images.unsplash.com/photo-1686002359940-6a51b0d64f68?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1024&q=80',
+      'https://images.unsplash.com/photo-1656408308602-05835d990fb1?q=80&w=3200&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D',
+      'https://images.unsplash.com/photo-1686002359940-6a51b0d64f68?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1024&q=80',
       'https://images.unsplash.com/photo-1590523278191-995cbcda646b?ixlib=rb-1.2.1&q=80&fm=jpg&crop=entropy&cs=tinysrgb&w=1080&fit=max&ixid=eyJhcHBfaWQiOjEyMDd9',
       'https://images.unsplash.com/photo-1709248835088-03bb0946d6ab?q=80&w=3387&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'
     ];
@@ -48,14 +47,13 @@ export default {
           0
         )}%`;
       },
-      device: 'gpu',
+      // rescale: false,
+      rescale: true,
+      // device: 'gpu',
       // device: 'cpu',
-      // model: 'isnet',
+      model: 'isnet',
       // model: 'isnet_fp16',
       // model: 'isnet_quint8',
-      // model: 'modnet',
-      // model: 'modnet_fp16', //# does not work on webgpu
-      // model: 'modnet_quint8',
       output: {
         quality: 0.8,
         format: 'image/png'
@@ -123,6 +121,8 @@ export default {
 
       imageUrl.value = randomImage;
       const imageBlob = await removeBackground(randomImage, config);
+      // const imageBlob = await alphamask(randomImage, config)
+      // const maskBlob = await trimap(randomImage, config)
       // const imageBlob = await removeForeground(randomImage, config);
       // const imageBlob = await segmentForeground(randomImage, config);
       // const maskBlob = await segmentForeground(randomImage, {...config}); // use this format for maximum efficient

diff --git a/packages/web/CHANGELOG.md b/packages/web/CHANGELOG.md
@@ -8,8 +8,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Added
 
-- Added Modnet models Added
-
 - Added option to execute on gpu (webgpu) and cpu  Added
 
 - Added isnet model for webgpu Added

diff --git a/packages/web/changelog/Unreleased/20242403201442-Added_Modnet_models_Added.yaml b/packages/web/changelog/Unreleased/20242403201442-Added_Modnet_models_Added.yaml
diff --git a/packages/web/src/codecs.ts b/packages/web/src/codecs.ts
@@ -27,6 +27,7 @@ async function imageDecode(blob: Blob): Promise<NdArray<Uint8Array>> {
     case 'application/octet-stream': // this is an unknwon type
     case `image/png`:
     case `image/jpeg`:
+    case `image/jpg`:
     case `image/webp`: {
       const imageBitmap = await createImageBitmap(blob);
       const imageData = imageBitmapToImageData(imageBitmap);

diff --git a/packages/web/src/index.ts b/packages/web/src/index.ts
@@ -3,6 +3,7 @@ export {
   preload,
   removeBackground,
   removeForeground,
+  alphamask,
   segmentForeground,
   applySegmentationMask
 };
@@ -11,10 +12,9 @@ export type { Config, ImageSource };
 import lodash from 'lodash';
 const { memoize } = lodash; //fixme with `lodash-es`
 
-import ndarray from 'ndarray';
 import { initInference, runInference } from './inference';
 import { preload as preloadResources } from './resource';
-import { Config, validateConfig } from './schema';
+import { Config, ConfigSchema, validateConfig } from './schema';
 import * as utils from './utils';
 import { ImageSource } from './utils';
 
@@ -41,13 +41,19 @@ async function removeBackground(
 
   if (config.progress) config.progress('compute:decode', 0, 4);
 
-  const imageTensor = await utils.imageSourceToImageData(image, config);
-  const [width, height, channels] = imageTensor.shape;
+  const inputImageTensor = await utils.imageSourceToImageData(image, config);
+
   config.progress?.('compute:inference', 1, 4);
-  const alphamask = await runInference(imageTensor, config, session);
-  const stride = width * height;
+  const [alphamask, imageTensor] = await runInference(
+    inputImageTensor,
+    config,
+    session
+  );
+
   config.progress?.('compute:mask', 2, 4);
   const outImageTensor = imageTensor;
+  const [width, height] = outImageTensor.shape;
+  const stride = width * height;
   for (let i = 0; i < stride; i += 1) {
     outImageTensor.data[4 * i + 3] = alphamask.data[i];
   }
@@ -76,11 +82,15 @@ async function removeForeground(
   const { config, session } = await init(configuration);
 
   const imageTensor = await utils.imageSourceToImageData(image, config);
-  const [width, height, channels] = imageTensor.shape;
+  const [alphamask, imageInput] = await runInference(
+    imageTensor,
+    config,
+    session
+  );
 
-  const alphamask = await runInference(imageTensor, config, session);
+  const outImageTensor = imageInput;
+  const [width, height, channels] = outImageTensor.shape;
   const stride = width * height;
-  const outImageTensor = imageTensor;
   for (let i = 0; i < stride; i += 1) {
     outImageTensor.data[4 * i + 3] = 255 - alphamask.data[i];
   }
@@ -101,48 +111,42 @@ async function removeForeground(
  * @param configuration - The optional configuration for the segmentation.
  * @returns A Promise that resolves to the segmented foreground as a Blob.
  */
+
+const alphamask = segmentForeground;
 async function segmentForeground(
   image: ImageSource,
   configuration?: Config
 ): Promise<Blob> {
   const { config, session } = await init(configuration);
 
   const imageTensor = await utils.imageSourceToImageData(image, config);
-  const [height, width, channels] = imageTensor.shape;
-
-  const alphamask = await runInference(imageTensor, config, session);
-
-  switch (config.output.format) {
-    case 'image/x-alpha8': {
-      const outImage = await utils.imageEncode(
-        alphamask,
-        config.output.quality,
-        config.output.format
-      );
-      return outImage;
-    }
-    default: {
-      const stride = width * height;
-      const outImageTensor = ndarray(new Uint8Array(channels * stride), [
-        height,
-        width,
-        channels
-      ]);
-      for (let i = 0; i < stride; i += 1) {
-        const index = 4 * i + 3;
-        outImageTensor.data[index] = alphamask.data[i]; //Red
-        outImageTensor.data[index + 1] = alphamask.data[i]; //Green
-        outImageTensor.data[index + 2] = alphamask.data[i]; // Blue
-        outImageTensor.data[index + 3] = 255;
-      }
-      const outImage = await utils.imageEncode(
-        outImageTensor,
-        config.output.quality,
-        config.output.format
-      );
-      return outImage;
-    }
+  let [height, width, channels] = imageTensor.shape;
+
+  const [alphamask, imageInput] = await runInference(
+    imageTensor,
+    config,
+    session
+  );
+
+  const stride = width * height;
+  const outImageTensor = imageTensor;
+  for (let i = 0; i < stride; i += 1) {
+    const index = 4 * i;
+
+    let alpha = alphamask.data[i];
+
+    outImageTensor.data[index] = 255;
+    outImageTensor.data[index + 1] = 255;
+    outImageTensor.data[index + 2] = 255;
+    outImageTensor.data[index + 3] = alpha;
   }
+
+  const outImage = await utils.imageEncode(
+    outImageTensor,
+    config.output.quality,
+    config.output.format
+  );
+  return outImage;
 }
 
 async function applySegmentationMask(

diff --git a/packages/web/src/inference.ts b/packages/web/src/inference.ts
@@ -8,57 +8,60 @@ import { loadAsBlob } from './resource';
 import ndarray, { NdArray } from 'ndarray';
 import { convertFloat32ToUint8 } from './utils';
 
-async function initInference(
-  config?: Config
-): Promise<{ config: Config; session: unknown }> {
-  config = validateConfig(config);
-
+async function initBase(config: Config): Promise<unknown> {
   if (config.debug) console.debug('Loading model...', config.model);
   const model = config.model;
   const blob = await loadAsBlob(`/models/${model}`, config);
   const arrayBuffer = await blob.arrayBuffer();
   const session = await createOnnxSession(arrayBuffer, config);
-  return { config, session };
+  return session;
+}
+
+async function initInference(
+  config?: Config
+): Promise<{ config: Config; session: { base: unknown } }> {
+  config = validateConfig(config);
+  const base = await initBase(config);
+  return { config, session: { base } };
 }
 
 async function runInference(
   imageTensor: NdArray<Uint8Array>,
   config: Config,
-  session: any
-): Promise<NdArray<Uint8Array>> {
+  session: { base: unknown }
+): Promise<[NdArray<Uint8Array>, NdArray<Uint8Array>]> {
   const resolution = 1024;
   const [srcHeight, srcWidth, srcChannels] = imageTensor.shape;
-  const proportional = true;
-  let tensorImage = tensorResizeBilinear(
+  const keepAspect = false;
+
+  let resizedImageTensor = tensorResizeBilinear(
     imageTensor,
     resolution,
     resolution,
-    proportional
+    keepAspect
   );
-  const inputTensor = tensorHWCtoBCHW(tensorImage); // this converts also from float to rgba
 
-  const predictionsDict = await runOnnxSession(
-    session,
+  const inputTensor = tensorHWCtoBCHW(resizedImageTensor); // this converts also from float to rgba
+
+  let predictionsDict = await runOnnxSession(
+    session.base,
     [['input', inputTensor]],
     ['output'],
     config
   );
 
   let alphamask = ndarray(predictionsDict[0].data, [resolution, resolution, 1]);
-  // alphamask = tensorResizeBilinear(
-  //   alphamask,
-  //   srcWidth,
-  //   srcHeight,
-  //   proportional
-  // );
 
   let alphamaskU8 = convertFloat32ToUint8(alphamask);
-  alphamaskU8 = tensorResizeBilinear(
-    alphamaskU8,
-    srcWidth,
-    srcHeight,
-    proportional
-  );
-
-  return alphamaskU8;
+  if (config.rescale) {
+    alphamaskU8 = tensorResizeBilinear(
+      alphamaskU8,
+      srcWidth,
+      srcHeight,
+      keepAspect
+    );
+    return [alphamaskU8, imageTensor];
+  } else {
+    return [alphamaskU8, resizedImageTensor];
+  }
 }