gradio-app · CtrlAltDeplete · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023 · Feb 17, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -120,6 +120,7 @@ No changes to highlight.
 * Raises a `gr.Error` instead of a regular Python error when you use `gr.Interface.load()` to load a model and there's an error querying the HF API by [@abidlabs](https://github.com/abidlabs) in [PR 3194](https://github.com/gradio-app/gradio/pull/3194)
 * Fixed gradio share links so that they are persistent and do not reset if network
 connection is disrupted by by [XciD](https://github.com/XciD), [Wauplin](https://github.com/Wauplin), and [@abidlabs](https://github.com/abidlabs) in [PR 3149](https://github.com/gradio-app/gradio/pull/3149) and a follow-up to allow it to work for users upgrading from a previous Gradio version in [PR 3221](https://github.com/gradio-app/gradio/pull/3221)
+* Added a bounding-box tool for image components by `[@CtrlAltDeplete](https://github.com/CtrlAltDeplete)` in `[PR 3220](https://github.com/gradio-app/gradio/pull/3220)`.
 
 ## Contributors Shoutout:
 No changes to highlight.

diff --git a/demo/image_boxes/README.md b/demo/image_boxes/README.md
@@ -0,0 +1 @@
+Simple example to show how to use the bounding boxes tool for Image Input.
diff --git a/demo/image_boxes/run.ipynb b/demo/image_boxes/run.ipynb
@@ -0,0 +1,64 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": 302934307671667531413257853548643485645,
+   "metadata": {},
+   "source": [
+    "# Gradio Demo: image_boxes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": 272996653310673477252411125948039410165,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q gradio "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": 288918539441861185822528903084949547379,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Downloading files from the demo repo\n",
+    "import os\n",
+    "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/image_boxes/README.md"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": 44380577570523278879349135829904343037,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gradio as gr\n",
+    "\n",
+    "\n",
+    "def predict(inp):\n",
+    "    image = inp['image']\n",
+    "    boxes = inp['mask']\n",
+    "\n",
+    "    sub_images = []\n",
+    "    for box in boxes:\n",
+    "        sub_images.append(image.crop(box))\n",
+    "    return sub_images\n",
+    "\n",
+    "\n",
+    "demo = gr.Interface(fn=predict,\n",
+    "                    inputs=gr.Image(tool=\"boxes\", type=\"pil\"),\n",
+    "                    outputs=gr.Gallery())\n",
+    "\n",
+    "demo.launch()\n"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/demo/image_boxes/run.py b/demo/image_boxes/run.py
@@ -0,0 +1,18 @@
+import gradio as gr
+
+
+def predict(inp):
+    image = inp['image']
+    boxes = inp['mask']
+
+    sub_images = []
+    for box in boxes:
+        sub_images.append(image.crop(box))
+    return sub_images
+
+
+demo = gr.Interface(fn=predict,
+                    inputs=gr.Image(tool="boxes", type="pil"),
+                    outputs=gr.Gallery())
+
+demo.launch()
diff --git a/gradio/components.py b/gradio/components.py
@@ -1364,7 +1364,7 @@ def __init__(
             image_mode: "RGB" if color, or "L" if black and white.
             invert_colors: whether to invert the image as a preprocessing step.
             source: Source of image. "upload" creates a box where user can drop an image file, "webcam" allows user to take snapshot from their webcam, "canvas" defaults to a white image that can be edited and drawn upon with tools.
-            tool: Tools used for editing. "editor" allows a full screen editor (and is the default if source is "upload" or "webcam"), "select" provides a cropping and zoom tool, "sketch" allows you to create a binary sketch (and is the default if source="canvas"), and "color-sketch" allows you to created a sketch in different colors. "color-sketch" can be used with source="upload" or "webcam" to allow sketching on an image. "sketch" can also be used with "upload" or "webcam" to create a mask over an image and in that case both the image and mask are passed into the function as a dictionary with keys "image" and "mask" respectively.
+            tool: Tools used for editing. "editor" allows a full screen editor (and is the default if source is "upload" or "webcam"), "select" provides a cropping and zoom tool, "boxes" provides a box-selection tool to create boudning boxes, "sketch" allows you to create a binary sketch (and is the default if source="canvas"), and "color-sketch" allows you to created a sketch in different colors. "color-sketch" can be used with source="upload" or "webcam" to allow sketching on an image. "sketch" can also be used with "upload" or "webcam" to create a mask over an image and in that case both the image and mask are passed into the function as a dictionary with keys "image" and "mask" respectively.
             type: The format the image is converted to before being passed into the prediction function. "numpy" converts the image to a numpy array with shape (width, height, 3) and values from 0 to 255, "pil" converts the image to a PIL image object, "filepath" passes a str path to a temporary file containing the image.
             label: component name in interface.
             every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open. Has no effect otherwise. Queue must be enabled. The event can be accessed (e.g. to cancel it) via this component's .load_event attribute.
@@ -1472,11 +1472,13 @@ def generate_sample(self):
         return deepcopy(media_data.BASE64_IMAGE)
 
     def preprocess(
-        self, x: str | Dict[str, str]
+        self, x: str | Dict[str, str] | Dict[str, str | List[List[float]]]
     ) -> np.ndarray | _Image.Image | str | Dict | None:
         """
         Parameters:
-            x: base64 url data, or (if tool == "sketch") a dict of image and mask base64 url data
+            x: base64 url data
+                (if tool == "sketch") a dict of image and mask base64 url data
+                (if tool == "boxes") a dict of image and bounding boxes
         Returns:
             image in requested format, or (if tool == "sketch") a dict of image and mask in requested format
         """
@@ -1486,8 +1488,17 @@ def preprocess(
         mask = ""
         if self.tool == "sketch" and self.source in ["upload", "webcam"]:
             assert isinstance(x, dict)
+            assert isinstance(x['image'], str)
+            assert isinstance(x['mask'], str)
             x, mask = x["image"], x["mask"]
 
+        boxes = []
+        if self.tool == "boxes" and self.source in ["upload", "webcam"]:
+            assert isinstance(x, dict)
+            assert isinstance(x['image'], str)
+            assert isinstance(x['boxes'], list)
+            x, boxes = x["image"], x["boxes"]
+
         assert isinstance(x, str)
         im = processing_utils.decode_base64_to_image(x)
         with warnings.catch_warnings():
@@ -1511,6 +1522,9 @@ def preprocess(
                 "mask": self._format_image(mask_im),
             }
 
+        if self.tool == "boxes" and self.source in ["upload", "webcam"]:
+            return {"image": self._format_image(im), "boxes": boxes}
+
         return self._format_image(im)
 
     def postprocess(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Simple example to show how to use the bounding boxes tool for Image Input.