From 945b5f723f9d412544f19aead67e7301e1a92ed4 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Mon, 7 Nov 2022 23:46:21 +0900
Subject: [PATCH 1/8] add notebooks for transform

---
 notebooks/05_transform.ipynb | 322 +++++++++++++++++++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100644 notebooks/05_transform.ipynb
diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
new file mode 100644
index 0000000000..8780c8203d
--- /dev/null
+++ b/notebooks/05_transform.ipynb
@@ -0,0 +1,322 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "652f6b58",
+   "metadata": {},
+   "source": [
+    "# Transform datasets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ca821a19",
+   "metadata": {},
+   "source": [
+    "In this notebook example, we'll take a look at Datumaro transform api, where transform provides the task changes by modifying the annotation style, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "da198c67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright (C) 2022 Intel Corporation\n",
+    "#\n",
+    "# SPDX-License-Identifier: MIT\n",
+    "\n",
+    "import os\n",
+    "import datumaro as dm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e2cf885",
+   "metadata": {},
+   "source": [
+    "### Filtered by subset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "031f1d62",
+   "metadata": {},
+   "source": [
+    "We export sample VOC dataset to filter only train subset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "id": "b9640838",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Representation for sample COCO dataset\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=123287\n",
+       "\tsource_path=coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=122218\n",
+       "\tannotations_count=1018861\n",
+       "subsets\n",
+       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n",
+    "\n",
+    "print('Representation for sample COCO dataset')\n",
+    "dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "id": "d38cfc9b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Subset candidates: ['val2017', 'train2017']\n"
+     ]
+    }
+   ],
+   "source": [
+    "subsets = list(dataset.subsets().keys())\n",
+    "print(\"Subset candidates:\", subsets)\n",
+    "\n",
+    "def get_ids(dataset: dm.Dataset, subset: str):\n",
+    "    ids = []\n",
+    "    for item in dataset:\n",
+    "        if item.subset == subset:\n",
+    "            ids += [item.id]\n",
+    "    \n",
+    "    return ids\n",
+    "\n",
+    "ids = get_ids(dataset, subsets[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db0e0346",
+   "metadata": {},
+   "source": [
+    "In VOC dataset, there are 'train' and 'test' subset. We will filter only 'train' subset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "51bf3388",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dataset.transform(\"masks_to_polygons\")\n",
+    "reindexing_dataset = dataset.transform(\"reindex\", start=0)\n",
+    "\n",
+    "ids = get_ids(reindexing_dataset, subsets[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "fb608396",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset\n",
+      "\tsize=123287\n",
+      "\tsource_path=coco_dataset\n",
+      "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+      "\tannotated_items_count=122218\n",
+      "\tannotations_count=1018861\n",
+      "subsets\n",
+      "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
+      "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+      "categories\n",
+      "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "rollback_dataset = dataset.transform(\"id_from_image_name\")\n",
+    "\n",
+    "ids = get_ids(rollback_dataset, subsets[0])\n",
+    "print(dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "a2515d03",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "__init__() got an unexpected keyword argument 'regex'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/core/formatters.py:706\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    699\u001b[0m stream \u001b[39m=\u001b[39m StringIO()\n\u001b[1;32m    700\u001b[0m printer \u001b[39m=\u001b[39m pretty\u001b[39m.\u001b[39mRepresentationPrinter(stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose,\n\u001b[1;32m    701\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_width, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnewline,\n\u001b[1;32m    702\u001b[0m     max_seq_length\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_seq_length,\n\u001b[1;32m    703\u001b[0m     singleton_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msingleton_printers,\n\u001b[1;32m    704\u001b[0m     type_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtype_printers,\n\u001b[1;32m    705\u001b[0m     deferred_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 706\u001b[0m printer\u001b[39m.\u001b[39;49mpretty(obj)\n\u001b[1;32m    707\u001b[0m printer\u001b[39m.\u001b[39mflush()\n\u001b[1;32m    708\u001b[0m \u001b[39mreturn\u001b[39;00m stream\u001b[39m.\u001b[39mgetvalue()\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    407\u001b[0m                         \u001b[39mreturn\u001b[39;00m meth(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m    408\u001b[0m                 \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mobject\u001b[39m \\\n\u001b[1;32m    409\u001b[0m                         \u001b[39mand\u001b[39;00m callable(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39m__repr__\u001b[39m\u001b[39m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m                     \u001b[39mreturn\u001b[39;00m _repr_pprint(obj, \u001b[39mself\u001b[39;49m, cycle)\n\u001b[1;32m    412\u001b[0m     \u001b[39mreturn\u001b[39;00m _default_pprint(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m    413\u001b[0m \u001b[39mfinally\u001b[39;00m:\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m    776\u001b[0m \u001b[39m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m    777\u001b[0m \u001b[39m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mrepr\u001b[39;49m(obj)\n\u001b[1;32m    779\u001b[0m lines \u001b[39m=\u001b[39m output\u001b[39m.\u001b[39msplitlines()\n\u001b[1;32m    780\u001b[0m \u001b[39mwith\u001b[39;00m p\u001b[39m.\u001b[39mgroup():\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m    843\u001b[0m     separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    844\u001b[0m     \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    846\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    847\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    848\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    849\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    850\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    851\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    852\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    853\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    854\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    855\u001b[0m     )\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m    569\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m    571\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m    363\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m         \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m    365\u001b[0m             \u001b[39mpass\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m    368\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m    369\u001b[0m         \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m    370\u001b[0m         \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m         \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m    372\u001b[0m             \u001b[39myield\u001b[39;00m item\n\u001b[1;32m    373\u001b[0m     \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m     transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m    452\u001b[0m     \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m    453\u001b[0m         \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m    454\u001b[0m         \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m    455\u001b[0m         \u001b[39mpass\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.<locals>._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m    400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m     source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m    402\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m    404\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n",
+      "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'"
+     ]
+    }
+   ],
+   "source": [
+    "mapping = {\"motorcycle\": \"bicycle\", \"bus\": \"car\", \"truck\": \"car\"}\n",
+    "remap_label_dataset = dataset.transform(\"remap_labels\", mapping=mapping)\n",
+    "remap_label_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "id": "d0bcd69e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset\n",
+      "\tsize=123287\n",
+      "\tsource_path=coco_dataset\n",
+      "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+      "\tannotated_items_count=122218\n",
+      "\tannotations_count=1018861\n",
+      "subsets\n",
+      "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
+      "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+      "categories\n",
+      "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "96a8e001",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "__init__() got an unexpected keyword argument 'regex'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [45], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m strr \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m|\u001b[39m\u001b[39m\\1\u001b[39;00m\u001b[39m|^image_|\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m      2\u001b[0m renamed_dataset \u001b[39m=\u001b[39m dataset\u001b[39m.\u001b[39mtransform(\u001b[39m\"\u001b[39m\u001b[39mrename\u001b[39m\u001b[39m\"\u001b[39m, regex\u001b[39m=\u001b[39mstrr)\n\u001b[0;32m----> 3\u001b[0m \u001b[39mprint\u001b[39;49m(renamed_dataset)\n\u001b[1;32m      5\u001b[0m \u001b[39m# ids = get_ids(dataset, subsets[0])\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[39m# print('val2017', ids)\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m    843\u001b[0m     separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    844\u001b[0m     \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    846\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    847\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    848\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    849\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    850\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    851\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    852\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    853\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    854\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    855\u001b[0m     )\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m    569\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m    571\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m    363\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m         \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m    365\u001b[0m             \u001b[39mpass\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m    368\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m    369\u001b[0m         \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m    370\u001b[0m         \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m         \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m    372\u001b[0m             \u001b[39myield\u001b[39;00m item\n\u001b[1;32m    373\u001b[0m     \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m     transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m    452\u001b[0m     \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m    453\u001b[0m         \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m    454\u001b[0m         \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m    455\u001b[0m         \u001b[39mpass\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.<locals>._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m    400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m     source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m    402\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m    404\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n",
+      "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'"
+     ]
+    }
+   ],
+   "source": [
+    "# strr = '|\\1|^image_|'\n",
+    "# renamed_dataset = dataset.transform(\"rename\", regex=strr)\n",
+    "# print(renamed_dataset)\n",
+    "\n",
+    "# ids = get_ids(dataset, subsets[0])\n",
+    "# print('val2017', ids)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4ed4a847",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datumaro.components.visualizer import Visualizer\n",
+    "\n",
+    "visualizer = Visualizer(dataset, figsize=(8, 8), alpha=0.7)\n",
+    "fig = visualizer.vis_gallery(ids[:4], subsets[0], (2, 2))\n",
+    "fig.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.13 ('datum')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "affff79ca1edacbf0919cffebb4fdcbe1cd4dfe1034cbc10ce20b177737f1c41"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 711b41b035cac6c61e397cce0c0b9a64a4da4915 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Fri, 11 Nov 2022 00:25:37 +0900
Subject: [PATCH 2/8] add nootebook example for transform api

---
 notebooks/05_transform.ipynb | 3378 ++++++++++++++++++++++++++++++++--
 1 file changed, 3250 insertions(+), 128 deletions(-)

diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
index 8780c8203d..a0d465d8ab 100644
--- a/notebooks/05_transform.ipynb
+++ b/notebooks/05_transform.ipynb
@@ -13,54 +13,36 @@
    "id": "ca821a19",
    "metadata": {},
    "source": [
-    "In this notebook example, we'll take a look at Datumaro transform api, where transform provides the task changes by modifying the annotation style, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "da198c67",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Copyright (C) 2022 Intel Corporation\n",
-    "#\n",
-    "# SPDX-License-Identifier: MIT\n",
-    "\n",
-    "import os\n",
-    "import datumaro as dm"
+    "In this notebook example, we will take a look at Datumaro transform api, where transform provides splitting and merging subsets, redefining annotation information, reidentifying media, and task-changing with the modification of the annotation format, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "9e2cf885",
+   "id": "bd9e52cf",
    "metadata": {},
    "source": [
-    "### Filtered by subset"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "031f1d62",
-   "metadata": {},
-   "source": [
-    "We export sample VOC dataset to filter only train subset."
+    "## Prerequisite\n",
+    "### Download COCO 2017 validation dataset\n",
+    "\n",
+    "Please refer https://github.com/openvinotoolkit/datumaro/blob/develop/notebooks/03_visualize.ipynb for preparing COCO 2017 validation dataset."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
-   "id": "b9640838",
+   "execution_count": 14,
+   "id": "da198c67",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
      ]
     },
     {
@@ -75,7 +57,7 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=123287\n",
-       "\tsource_path=coco_dataset\n",
+       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=122218\n",
        "\tannotations_count=1018861\n",
@@ -86,21 +68,38 @@
        "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "# Copyright (C) 2022 Intel Corporation\n",
+    "#\n",
+    "# SPDX-License-Identifier: MIT\n",
+    "\n",
+    "import os\n",
+    "import datumaro as dm\n",
+    "\n",
     "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n",
     "\n",
     "print('Representation for sample COCO dataset')\n",
     "dataset"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "50b11dc3",
+   "metadata": {},
+   "source": [
+    "### Transform media ID\n",
+    "\n",
+    "We first modify the `media_id` through transformation. The original `media_id` are given by below."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 15,
    "id": "d38cfc9b",
    "metadata": {
     "scrolled": true
@@ -112,6 +111,1016 @@
      "text": [
       "Subset candidates: ['val2017', 'train2017']\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "['000000397133',\n",
+       " '000000037777',\n",
+       " '000000252219',\n",
+       " '000000087038',\n",
+       " '000000174482',\n",
+       " '000000403385',\n",
+       " '000000006818',\n",
+       " '000000480985',\n",
+       " '000000458054',\n",
+       " '000000331352',\n",
+       " '000000296649',\n",
+       " '000000386912',\n",
+       " '000000502136',\n",
+       " '000000491497',\n",
+       " '000000184791',\n",
+       " '000000348881',\n",
+       " '000000289393',\n",
+       " '000000522713',\n",
+       " '000000181666',\n",
+       " '000000017627',\n",
+       " '000000143931',\n",
+       " '000000303818',\n",
+       " '000000463730',\n",
+       " '000000460347',\n",
+       " '000000322864',\n",
+       " '000000226111',\n",
+       " '000000153299',\n",
+       " '000000308394',\n",
+       " '000000456496',\n",
+       " '000000058636',\n",
+       " '000000041888',\n",
+       " '000000184321',\n",
+       " '000000565778',\n",
+       " '000000297343',\n",
+       " '000000336587',\n",
+       " '000000122745',\n",
+       " '000000219578',\n",
+       " '000000555705',\n",
+       " '000000443303',\n",
+       " '000000500663',\n",
+       " '000000418281',\n",
+       " '000000025560',\n",
+       " '000000403817',\n",
+       " '000000085329',\n",
+       " '000000329323',\n",
+       " '000000239274',\n",
+       " '000000286994',\n",
+       " '000000511321',\n",
+       " '000000314294',\n",
+       " '000000233771',\n",
+       " '000000475779',\n",
+       " '000000301867',\n",
+       " '000000312421',\n",
+       " '000000185250',\n",
+       " '000000356427',\n",
+       " '000000572517',\n",
+       " '000000270244',\n",
+       " '000000516316',\n",
+       " '000000125211',\n",
+       " '000000562121',\n",
+       " '000000360661',\n",
+       " '000000016228',\n",
+       " '000000382088',\n",
+       " '000000266409',\n",
+       " '000000430961',\n",
+       " '000000080671',\n",
+       " '000000577539',\n",
+       " '000000104612',\n",
+       " '000000476258',\n",
+       " '000000448365',\n",
+       " '000000035197',\n",
+       " '000000349860',\n",
+       " '000000180135',\n",
+       " '000000486438',\n",
+       " '000000400573',\n",
+       " '000000109798',\n",
+       " '000000370677',\n",
+       " '000000238866',\n",
+       " '000000369370',\n",
+       " '000000502737',\n",
+       " '000000515579',\n",
+       " '000000515445',\n",
+       " '000000173383',\n",
+       " '000000438862',\n",
+       " '000000180560',\n",
+       " '000000347693',\n",
+       " '000000039956',\n",
+       " '000000321214',\n",
+       " '000000474028',\n",
+       " '000000066523',\n",
+       " '000000355257',\n",
+       " '000000142092',\n",
+       " '000000063154',\n",
+       " '000000199551',\n",
+       " '000000239347',\n",
+       " '000000514508',\n",
+       " '000000473237',\n",
+       " '000000228144',\n",
+       " '000000206027',\n",
+       " '000000078915',\n",
+       " '000000551215',\n",
+       " '000000544519',\n",
+       " '000000096493',\n",
+       " '000000023899',\n",
+       " '000000340175',\n",
+       " '000000578500',\n",
+       " '000000366141',\n",
+       " '000000057597',\n",
+       " '000000559842',\n",
+       " '000000434230',\n",
+       " '000000428454',\n",
+       " '000000399462',\n",
+       " '000000261061',\n",
+       " '000000168330',\n",
+       " '000000383384',\n",
+       " '000000342006',\n",
+       " '000000217285',\n",
+       " '000000236412',\n",
+       " '000000524456',\n",
+       " '000000153343',\n",
+       " '000000095786',\n",
+       " '000000326541',\n",
+       " '000000213086',\n",
+       " '000000231339',\n",
+       " '000000508730',\n",
+       " '000000550426',\n",
+       " '000000368294',\n",
+       " '000000171190',\n",
+       " '000000301135',\n",
+       " '000000580294',\n",
+       " '000000494869',\n",
+       " '000000033638',\n",
+       " '000000329219',\n",
+       " '000000034873',\n",
+       " '000000186980',\n",
+       " '000000127182',\n",
+       " '000000356387',\n",
+       " '000000367680',\n",
+       " '000000263796',\n",
+       " '000000117425',\n",
+       " '000000365387',\n",
+       " '000000487583',\n",
+       " '000000504711',\n",
+       " '000000363840',\n",
+       " '000000214720',\n",
+       " '000000379453',\n",
+       " '000000311295',\n",
+       " '000000029393',\n",
+       " '000000278848',\n",
+       " '000000166391',\n",
+       " '000000048153',\n",
+       " '000000459153',\n",
+       " '000000295713',\n",
+       " '000000223130',\n",
+       " '000000273132',\n",
+       " '000000198960',\n",
+       " '000000344059',\n",
+       " '000000410428',\n",
+       " '000000087875',\n",
+       " '000000450758',\n",
+       " '000000458790',\n",
+       " '000000460160',\n",
+       " '000000458109',\n",
+       " '000000030675',\n",
+       " '000000566524',\n",
+       " '000000338428',\n",
+       " '000000545826',\n",
+       " '000000166277',\n",
+       " '000000269314',\n",
+       " '000000476415',\n",
+       " '000000292082',\n",
+       " '000000360137',\n",
+       " '000000122046',\n",
+       " '000000352684',\n",
+       " '000000512836',\n",
+       " '000000008021',\n",
+       " '000000107226',\n",
+       " '000000084477',\n",
+       " '000000562243',\n",
+       " '000000181859',\n",
+       " '000000177015',\n",
+       " '000000292236',\n",
+       " '000000121506',\n",
+       " '000000288042',\n",
+       " '000000453860',\n",
+       " '000000500257',\n",
+       " '000000113403',\n",
+       " '000000125062',\n",
+       " '000000375015',\n",
+       " '000000334719',\n",
+       " '000000134112',\n",
+       " '000000283520',\n",
+       " '000000031269',\n",
+       " '000000319721',\n",
+       " '000000165351',\n",
+       " '000000347265',\n",
+       " '000000414170',\n",
+       " '000000231508',\n",
+       " '000000389381',\n",
+       " '000000118921',\n",
+       " '000000021503',\n",
+       " '000000000785',\n",
+       " '000000300842',\n",
+       " '000000105014',\n",
+       " '000000261982',\n",
+       " '000000034205',\n",
+       " '000000099242',\n",
+       " '000000314709',\n",
+       " '000000460494',\n",
+       " '000000339442',\n",
+       " '000000541055',\n",
+       " '000000409475',\n",
+       " '000000464786',\n",
+       " '000000378605',\n",
+       " '000000331817',\n",
+       " '000000218091',\n",
+       " '000000578545',\n",
+       " '000000363207',\n",
+       " '000000372577',\n",
+       " '000000212166',\n",
+       " '000000172571',\n",
+       " '000000294831',\n",
+       " '000000084431',\n",
+       " '000000323355',\n",
+       " '000000355325',\n",
+       " '000000100582',\n",
+       " '000000555412',\n",
+       " '000000004495',\n",
+       " '000000009483',\n",
+       " '000000326082',\n",
+       " '000000398237',\n",
+       " '000000507223',\n",
+       " '000000031050',\n",
+       " '000000239537',\n",
+       " '000000340930',\n",
+       " '000000011813',\n",
+       " '000000281414',\n",
+       " '000000537991',\n",
+       " '000000284282',\n",
+       " '000000321333',\n",
+       " '000000521282',\n",
+       " '000000108026',\n",
+       " '000000243204',\n",
+       " '000000177935',\n",
+       " '000000038829',\n",
+       " '000000397327',\n",
+       " '000000501523',\n",
+       " '000000555050',\n",
+       " '000000376442',\n",
+       " '000000187243',\n",
+       " '000000356347',\n",
+       " '000000293044',\n",
+       " '000000560279',\n",
+       " '000000042276',\n",
+       " '000000534827',\n",
+       " '000000190756',\n",
+       " '000000482917',\n",
+       " '000000300659',\n",
+       " '000000199977',\n",
+       " '000000442480',\n",
+       " '000000384350',\n",
+       " '000000383621',\n",
+       " '000000189828',\n",
+       " '000000412894',\n",
+       " '000000537153',\n",
+       " '000000361103',\n",
+       " '000000392722',\n",
+       " '000000338560',\n",
+       " '000000264535',\n",
+       " '000000295231',\n",
+       " '000000154947',\n",
+       " '000000212559',\n",
+       " '000000458755',\n",
+       " '000000104782',\n",
+       " '000000315257',\n",
+       " '000000130599',\n",
+       " '000000227187',\n",
+       " '000000151662',\n",
+       " '000000461275',\n",
+       " '000000523811',\n",
+       " '000000456559',\n",
+       " '000000101068',\n",
+       " '000000140640',\n",
+       " '000000516708',\n",
+       " '000000544605',\n",
+       " '000000385190',\n",
+       " '000000338986',\n",
+       " '000000053994',\n",
+       " '000000061171',\n",
+       " '000000314034',\n",
+       " '000000291490',\n",
+       " '000000152740',\n",
+       " '000000024919',\n",
+       " '000000079837',\n",
+       " '000000021903',\n",
+       " '000000564133',\n",
+       " '000000337055',\n",
+       " '000000110638',\n",
+       " '000000034139',\n",
+       " '000000080340',\n",
+       " '000000083113',\n",
+       " '000000173033',\n",
+       " '000000255664',\n",
+       " '000000072813',\n",
+       " '000000545129',\n",
+       " '000000546011',\n",
+       " '000000121031',\n",
+       " '000000172547',\n",
+       " '000000369081',\n",
+       " '000000509131',\n",
+       " '000000578922',\n",
+       " '000000464089',\n",
+       " '000000453708',\n",
+       " '000000177714',\n",
+       " '000000459887',\n",
+       " '000000155179',\n",
+       " '000000261116',\n",
+       " '000000396274',\n",
+       " '000000029640',\n",
+       " '000000141328',\n",
+       " '000000308430',\n",
+       " '000000043314',\n",
+       " '000000273715',\n",
+       " '000000456303',\n",
+       " '000000406611',\n",
+       " '000000475064',\n",
+       " '000000466567',\n",
+       " '000000137246',\n",
+       " '000000015079',\n",
+       " '000000296284',\n",
+       " '000000226147',\n",
+       " '000000226903',\n",
+       " '000000127517',\n",
+       " '000000162092',\n",
+       " '000000131379',\n",
+       " '000000366611',\n",
+       " '000000263969',\n",
+       " '000000551439',\n",
+       " '000000474167',\n",
+       " '000000159458',\n",
+       " '000000554735',\n",
+       " '000000099428',\n",
+       " '000000386352',\n",
+       " '000000173004',\n",
+       " '000000311394',\n",
+       " '000000578489',\n",
+       " '000000189310',\n",
+       " '000000491366',\n",
+       " '000000448076',\n",
+       " '000000293804',\n",
+       " '000000312237',\n",
+       " '000000221291',\n",
+       " '000000141821',\n",
+       " '000000410650',\n",
+       " '000000199310',\n",
+       " '000000323151',\n",
+       " '000000089648',\n",
+       " '000000219283',\n",
+       " '000000471869',\n",
+       " '000000520264',\n",
+       " '000000111179',\n",
+       " '000000151000',\n",
+       " '000000100624',\n",
+       " '000000332570',\n",
+       " '000000057238',\n",
+       " '000000502732',\n",
+       " '000000135561',\n",
+       " '000000008277',\n",
+       " '000000173044',\n",
+       " '000000168458',\n",
+       " '000000512194',\n",
+       " '000000370042',\n",
+       " '000000189436',\n",
+       " '000000533958',\n",
+       " '000000117645',\n",
+       " '000000221708',\n",
+       " '000000202228',\n",
+       " '000000403565',\n",
+       " '000000211042',\n",
+       " '000000492878',\n",
+       " '000000441586',\n",
+       " '000000547816',\n",
+       " '000000306733',\n",
+       " '000000530099',\n",
+       " '000000312278',\n",
+       " '000000097679',\n",
+       " '000000564127',\n",
+       " '000000251065',\n",
+       " '000000003845',\n",
+       " '000000138819',\n",
+       " '000000205834',\n",
+       " '000000348708',\n",
+       " '000000166521',\n",
+       " '000000485802',\n",
+       " '000000099054',\n",
+       " '000000022969',\n",
+       " '000000570539',\n",
+       " '000000278353',\n",
+       " '000000158548',\n",
+       " '000000461405',\n",
+       " '000000176606',\n",
+       " '000000044699',\n",
+       " '000000559956',\n",
+       " '000000268996',\n",
+       " '000000011197',\n",
+       " '000000483667',\n",
+       " '000000448810',\n",
+       " '000000000724',\n",
+       " '000000051961',\n",
+       " '000000375278',\n",
+       " '000000302165',\n",
+       " '000000131131',\n",
+       " '000000098839',\n",
+       " '000000402992',\n",
+       " '000000465675',\n",
+       " '000000240754',\n",
+       " '000000021167',\n",
+       " '000000148730',\n",
+       " '000000384468',\n",
+       " '000000253742',\n",
+       " '000000186873',\n",
+       " '000000082180',\n",
+       " '000000446522',\n",
+       " '000000552902',\n",
+       " '000000125405',\n",
+       " '000000110211',\n",
+       " '000000016010',\n",
+       " '000000064462',\n",
+       " '000000314182',\n",
+       " '000000248980',\n",
+       " '000000068387',\n",
+       " '000000429281',\n",
+       " '000000345466',\n",
+       " '000000352900',\n",
+       " '000000118367',\n",
+       " '000000113235',\n",
+       " '000000311303',\n",
+       " '000000163640',\n",
+       " '000000370999',\n",
+       " '000000001490',\n",
+       " '000000329456',\n",
+       " '000000570471',\n",
+       " '000000088269',\n",
+       " '000000260470',\n",
+       " '000000193494',\n",
+       " '000000252776',\n",
+       " '000000201072',\n",
+       " '000000018150',\n",
+       " '000000337498',\n",
+       " '000000521405',\n",
+       " '000000518770',\n",
+       " '000000201646',\n",
+       " '000000036936',\n",
+       " '000000059044',\n",
+       " '000000172946',\n",
+       " '000000234607',\n",
+       " '000000532690',\n",
+       " '000000323895',\n",
+       " '000000384670',\n",
+       " '000000050326',\n",
+       " '000000205542',\n",
+       " '000000217957',\n",
+       " '000000162035',\n",
+       " '000000415727',\n",
+       " '000000046252',\n",
+       " '000000182021',\n",
+       " '000000231747',\n",
+       " '000000090284',\n",
+       " '000000286553',\n",
+       " '000000488736',\n",
+       " '000000063602',\n",
+       " '000000383386',\n",
+       " '000000450686',\n",
+       " '000000005060',\n",
+       " '000000286523',\n",
+       " '000000120420',\n",
+       " '000000579655',\n",
+       " '000000117908',\n",
+       " '000000550322',\n",
+       " '000000322844',\n",
+       " '000000218362',\n",
+       " '000000213224',\n",
+       " '000000223747',\n",
+       " '000000297578',\n",
+       " '000000458992',\n",
+       " '000000078266',\n",
+       " '000000164602',\n",
+       " '000000440475',\n",
+       " '000000101762',\n",
+       " '000000557501',\n",
+       " '000000203317',\n",
+       " '000000368940',\n",
+       " '000000569917',\n",
+       " '000000144798',\n",
+       " '000000284623',\n",
+       " '000000520301',\n",
+       " '000000127987',\n",
+       " '000000063740',\n",
+       " '000000036494',\n",
+       " '000000210032',\n",
+       " '000000488270',\n",
+       " '000000067180',\n",
+       " '000000281179',\n",
+       " '000000064359',\n",
+       " '000000126226',\n",
+       " '000000190923',\n",
+       " '000000150265',\n",
+       " '000000216739',\n",
+       " '000000038048',\n",
+       " '000000354829',\n",
+       " '000000525155',\n",
+       " '000000163314',\n",
+       " '000000259571',\n",
+       " '000000561679',\n",
+       " '000000236166',\n",
+       " '000000153529',\n",
+       " '000000473015',\n",
+       " '000000379800',\n",
+       " '000000253835',\n",
+       " '000000034071',\n",
+       " '000000036861',\n",
+       " '000000569565',\n",
+       " '000000219271',\n",
+       " '000000205647',\n",
+       " '000000460841',\n",
+       " '000000123131',\n",
+       " '000000334006',\n",
+       " '000000511599',\n",
+       " '000000229858',\n",
+       " '000000174004',\n",
+       " '000000519764',\n",
+       " '000000137576',\n",
+       " '000000087470',\n",
+       " '000000009769',\n",
+       " '000000558114',\n",
+       " '000000205776',\n",
+       " '000000163257',\n",
+       " '000000475678',\n",
+       " '000000085478',\n",
+       " '000000318080',\n",
+       " '000000361551',\n",
+       " '000000236784',\n",
+       " '000000092839',\n",
+       " '000000042296',\n",
+       " '000000560266',\n",
+       " '000000486479',\n",
+       " '000000127955',\n",
+       " '000000307658',\n",
+       " '000000417465',\n",
+       " '000000342971',\n",
+       " '000000011760',\n",
+       " '000000069106',\n",
+       " '000000070158',\n",
+       " '000000176634',\n",
+       " '000000281447',\n",
+       " '000000552371',\n",
+       " '000000361919',\n",
+       " '000000560256',\n",
+       " '000000138115',\n",
+       " '000000114871',\n",
+       " '000000374369',\n",
+       " '000000123213',\n",
+       " '000000123321',\n",
+       " '000000015278',\n",
+       " '000000357742',\n",
+       " '000000439854',\n",
+       " '000000465836',\n",
+       " '000000414385',\n",
+       " '000000131556',\n",
+       " '000000322724',\n",
+       " '000000320664',\n",
+       " '000000481390',\n",
+       " '000000109916',\n",
+       " '000000276434',\n",
+       " '000000579635',\n",
+       " '000000295316',\n",
+       " '000000571313',\n",
+       " '000000183127',\n",
+       " '000000115898',\n",
+       " '000000146358',\n",
+       " '000000329542',\n",
+       " '000000189752',\n",
+       " '000000290163',\n",
+       " '000000091406',\n",
+       " '000000322352',\n",
+       " '000000223959',\n",
+       " '000000326248',\n",
+       " '000000218439',\n",
+       " '000000453722',\n",
+       " '000000293625',\n",
+       " '000000411817',\n",
+       " '000000546964',\n",
+       " '000000215259',\n",
+       " '000000573094',\n",
+       " '000000560011',\n",
+       " '000000038576',\n",
+       " '000000147729',\n",
+       " '000000579307',\n",
+       " '000000154425',\n",
+       " '000000432898',\n",
+       " '000000404923',\n",
+       " '000000130586',\n",
+       " '000000163057',\n",
+       " '000000007511',\n",
+       " '000000067406',\n",
+       " '000000290179',\n",
+       " '000000248752',\n",
+       " '000000054593',\n",
+       " '000000116208',\n",
+       " '000000340697',\n",
+       " '000000450303',\n",
+       " '000000494427',\n",
+       " '000000137294',\n",
+       " '000000410880',\n",
+       " '000000311180',\n",
+       " '000000091654',\n",
+       " '000000181796',\n",
+       " '000000002431',\n",
+       " '000000349184',\n",
+       " '000000298396',\n",
+       " '000000472046',\n",
+       " '000000074058',\n",
+       " '000000058029',\n",
+       " '000000134096',\n",
+       " '000000111951',\n",
+       " '000000103585',\n",
+       " '000000210273',\n",
+       " '000000352584',\n",
+       " '000000446651',\n",
+       " '000000194875',\n",
+       " '000000052017',\n",
+       " '000000336309',\n",
+       " '000000227478',\n",
+       " '000000339870',\n",
+       " '000000080666',\n",
+       " '000000033707',\n",
+       " '000000327601',\n",
+       " '000000255749',\n",
+       " '000000008762',\n",
+       " '000000526392',\n",
+       " '000000535578',\n",
+       " '000000580757',\n",
+       " '000000165039',\n",
+       " '000000148719',\n",
+       " '000000108440',\n",
+       " '000000489842',\n",
+       " '000000579818',\n",
+       " '000000423229',\n",
+       " '000000323828',\n",
+       " '000000166287',\n",
+       " '000000101420',\n",
+       " '000000334555',\n",
+       " '000000196759',\n",
+       " '000000411665',\n",
+       " '000000061418',\n",
+       " '000000526751',\n",
+       " '000000024021',\n",
+       " '000000277020',\n",
+       " '000000047828',\n",
+       " '000000183716',\n",
+       " '000000271997',\n",
+       " '000000008532',\n",
+       " '000000094336',\n",
+       " '000000390555',\n",
+       " '000000250282',\n",
+       " '000000068409',\n",
+       " '000000002299',\n",
+       " '000000011051',\n",
+       " '000000066038',\n",
+       " '000000360960',\n",
+       " '000000360097',\n",
+       " '000000421455',\n",
+       " '000000504589',\n",
+       " '000000464522',\n",
+       " '000000454750',\n",
+       " '000000509735',\n",
+       " '000000023034',\n",
+       " '000000141671',\n",
+       " '000000506656',\n",
+       " '000000272566',\n",
+       " '000000045728',\n",
+       " '000000424551',\n",
+       " '000000341719',\n",
+       " '000000072795',\n",
+       " '000000078959',\n",
+       " '000000417285',\n",
+       " '000000002157',\n",
+       " '000000043816',\n",
+       " '000000455555',\n",
+       " '000000535306',\n",
+       " '000000030504',\n",
+       " '000000093353',\n",
+       " '000000530052',\n",
+       " '000000473118',\n",
+       " '000000091779',\n",
+       " '000000283113',\n",
+       " '000000226130',\n",
+       " '000000097278',\n",
+       " '000000567640',\n",
+       " '000000532493',\n",
+       " '000000045550',\n",
+       " '000000156643',\n",
+       " '000000430056',\n",
+       " '000000410456',\n",
+       " '000000441286',\n",
+       " '000000279541',\n",
+       " '000000000885',\n",
+       " '000000378284',\n",
+       " '000000156076',\n",
+       " '000000143572',\n",
+       " '000000229849',\n",
+       " '000000039551',\n",
+       " '000000056344',\n",
+       " '000000193348',\n",
+       " '000000016958',\n",
+       " '000000572678',\n",
+       " '000000106235',\n",
+       " '000000341681',\n",
+       " '000000083172',\n",
+       " '000000343524',\n",
+       " '000000395801',\n",
+       " '000000388056',\n",
+       " '000000259690',\n",
+       " '000000235836',\n",
+       " '000000343218',\n",
+       " '000000205105',\n",
+       " '000000513283',\n",
+       " '000000176446',\n",
+       " '000000371677',\n",
+       " '000000308531',\n",
+       " '000000497599',\n",
+       " '000000455352',\n",
+       " '000000236914',\n",
+       " '000000232684',\n",
+       " '000000415238',\n",
+       " '000000290843',\n",
+       " '000000519522',\n",
+       " '000000144784',\n",
+       " '000000167486',\n",
+       " '000000392228',\n",
+       " '000000488673',\n",
+       " '000000191013',\n",
+       " '000000080057',\n",
+       " '000000570169',\n",
+       " '000000224807',\n",
+       " '000000163562',\n",
+       " '000000136355',\n",
+       " '000000492362',\n",
+       " '000000102707',\n",
+       " '000000232563',\n",
+       " '000000010977',\n",
+       " '000000051598',\n",
+       " '000000032285',\n",
+       " '000000520910',\n",
+       " '000000131273',\n",
+       " '000000206411',\n",
+       " '000000472375',\n",
+       " '000000481404',\n",
+       " '000000471991',\n",
+       " '000000017436',\n",
+       " '000000177934',\n",
+       " '000000165518',\n",
+       " '000000571718',\n",
+       " '000000459467',\n",
+       " '000000135673',\n",
+       " '000000134886',\n",
+       " '000000485895',\n",
+       " '000000287545',\n",
+       " '000000577182',\n",
+       " '000000289222',\n",
+       " '000000372819',\n",
+       " '000000310072',\n",
+       " '000000087144',\n",
+       " '000000430875',\n",
+       " '000000060347',\n",
+       " '000000042070',\n",
+       " '000000420916',\n",
+       " '000000453584',\n",
+       " '000000296224',\n",
+       " '000000122606',\n",
+       " '000000311909',\n",
+       " '000000579893',\n",
+       " '000000284296',\n",
+       " '000000221017',\n",
+       " '000000315001',\n",
+       " '000000439715',\n",
+       " '000000284991',\n",
+       " '000000389566',\n",
+       " '000000078843',\n",
+       " '000000122927',\n",
+       " '000000225532',\n",
+       " '000000013659',\n",
+       " '000000153568',\n",
+       " '000000395633',\n",
+       " '000000419096',\n",
+       " '000000203488',\n",
+       " '000000361268',\n",
+       " '000000466125',\n",
+       " '000000414795',\n",
+       " '000000508101',\n",
+       " '000000253386',\n",
+       " '000000222991',\n",
+       " '000000530854',\n",
+       " '000000351810',\n",
+       " '000000338624',\n",
+       " '000000138492',\n",
+       " '000000263463',\n",
+       " '000000226592',\n",
+       " '000000378454',\n",
+       " '000000020059',\n",
+       " '000000227686',\n",
+       " '000000476215',\n",
+       " '000000297698',\n",
+       " '000000247917',\n",
+       " '000000439522',\n",
+       " '000000479448',\n",
+       " '000000424721',\n",
+       " '000000026690',\n",
+       " '000000558854',\n",
+       " '000000176901',\n",
+       " '000000334767',\n",
+       " '000000301563',\n",
+       " '000000086755',\n",
+       " '000000194471',\n",
+       " '000000420281',\n",
+       " '000000533206',\n",
+       " '000000099810',\n",
+       " '000000334483',\n",
+       " '000000089670',\n",
+       " '000000482275',\n",
+       " '000000404805',\n",
+       " '000000002261',\n",
+       " '000000425702',\n",
+       " '000000036844',\n",
+       " '000000012576',\n",
+       " '000000361238',\n",
+       " '000000108253',\n",
+       " '000000319935',\n",
+       " '000000003934',\n",
+       " '000000029596',\n",
+       " '000000047740',\n",
+       " '000000077460',\n",
+       " '000000014439',\n",
+       " '000000571893',\n",
+       " '000000447314',\n",
+       " '000000181303',\n",
+       " '000000058350',\n",
+       " '000000026465',\n",
+       " '000000246968',\n",
+       " '000000536947',\n",
+       " '000000076731',\n",
+       " '000000286182',\n",
+       " '000000433980',\n",
+       " '000000561366',\n",
+       " '000000380913',\n",
+       " '000000032887',\n",
+       " '000000517687',\n",
+       " '000000213035',\n",
+       " '000000399205',\n",
+       " '000000349837',\n",
+       " '000000350002',\n",
+       " '000000131431',\n",
+       " '000000356248',\n",
+       " '000000334399',\n",
+       " '000000057150',\n",
+       " '000000363666',\n",
+       " '000000507235',\n",
+       " '000000169996',\n",
+       " '000000226417',\n",
+       " '000000481573',\n",
+       " '000000056127',\n",
+       " '000000123480',\n",
+       " '000000274687',\n",
+       " '000000164637',\n",
+       " '000000178028',\n",
+       " '000000493286',\n",
+       " '000000348216',\n",
+       " '000000345027',\n",
+       " '000000571804',\n",
+       " '000000140658',\n",
+       " '000000102644',\n",
+       " '000000581615',\n",
+       " '000000279887',\n",
+       " '000000230008',\n",
+       " '000000284698',\n",
+       " '000000102356',\n",
+       " '000000456394',\n",
+       " '000000323709',\n",
+       " '000000452122',\n",
+       " '000000579158',\n",
+       " '000000525322',\n",
+       " '000000033114',\n",
+       " '000000008690',\n",
+       " '000000381639',\n",
+       " '000000217614',\n",
+       " '000000284445',\n",
+       " '000000468124',\n",
+       " '000000187144',\n",
+       " '000000273198',\n",
+       " '000000095843',\n",
+       " '000000417779',\n",
+       " '000000447342',\n",
+       " '000000166563',\n",
+       " '000000490125',\n",
+       " '000000561009',\n",
+       " '000000183675',\n",
+       " '000000290248',\n",
+       " '000000532058',\n",
+       " '000000214200',\n",
+       " '000000578093',\n",
+       " '000000369751',\n",
+       " '000000429011',\n",
+       " '000000301061',\n",
+       " '000000105264',\n",
+       " '000000267434',\n",
+       " '000000370711',\n",
+       " '000000025393',\n",
+       " '000000471087',\n",
+       " '000000106757',\n",
+       " '000000183648',\n",
+       " '000000358525',\n",
+       " '000000049269',\n",
+       " '000000079144',\n",
+       " '000000519688',\n",
+       " '000000431727',\n",
+       " '000000130699',\n",
+       " '000000215245',\n",
+       " '000000091921',\n",
+       " '000000218424',\n",
+       " '000000473974',\n",
+       " '000000405249',\n",
+       " '000000235784',\n",
+       " '000000521540',\n",
+       " '000000537506',\n",
+       " '000000119445',\n",
+       " '000000507015',\n",
+       " '000000173830',\n",
+       " '000000356498',\n",
+       " '000000435081',\n",
+       " '000000018575',\n",
+       " '000000373315',\n",
+       " '000000227765',\n",
+       " '000000013546',\n",
+       " '000000067310',\n",
+       " '000000125936',\n",
+       " '000000389109',\n",
+       " '000000322211',\n",
+       " '000000184384',\n",
+       " '000000426329',\n",
+       " '000000128476',\n",
+       " '000000414034',\n",
+       " '000000450488',\n",
+       " '000000099182',\n",
+       " '000000051738',\n",
+       " '000000099039',\n",
+       " '000000075456',\n",
+       " '000000134882',\n",
+       " '000000442323',\n",
+       " '000000232489',\n",
+       " '000000351823',\n",
+       " '000000065736',\n",
+       " '000000001000',\n",
+       " '000000379842',\n",
+       " '000000013923',\n",
+       " '000000559543',\n",
+       " '000000185890',\n",
+       " '000000357978',\n",
+       " '000000129492',\n",
+       " '000000261097',\n",
+       " '000000410510',\n",
+       " '000000039951',\n",
+       " '000000306700',\n",
+       " '000000146457',\n",
+       " '000000214224',\n",
+       " '000000332845',\n",
+       " '000000255483',\n",
+       " '000000222455',\n",
+       " '000000187271',\n",
+       " '000000462629',\n",
+       " '000000544565',\n",
+       " '000000369771',\n",
+       " '000000035963',\n",
+       " '000000289516',\n",
+       " '000000334309',\n",
+       " '000000452084',\n",
+       " '000000301718',\n",
+       " '000000429598',\n",
+       " '000000165257',\n",
+       " '000000093437',\n",
+       " '000000413552',\n",
+       " '000000062025',\n",
+       " '000000017379',\n",
+       " '000000176778',\n",
+       " '000000104572',\n",
+       " '000000090108',\n",
+       " '000000157124',\n",
+       " '000000089556',\n",
+       " '000000266206',\n",
+       " '000000086220',\n",
+       " '000000508602',\n",
+       " ...]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -126,7 +1135,7 @@
     "    \n",
     "    return ids\n",
     "\n",
-    "ids = get_ids(dataset, subsets[0])"
+    "get_ids(dataset, subsets[0])"
    ]
   },
   {
@@ -134,80 +1143,2098 @@
    "id": "db0e0346",
    "metadata": {},
    "source": [
-    "In VOC dataset, there are 'train' and 'test' subset. We will filter only 'train' subset."
+    "We here adopt `reindex` transformation to make `media_id` be incrementing from `start`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 16,
    "id": "51bf3388",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['0',\n",
+       " '1',\n",
+       " '2',\n",
+       " '3',\n",
+       " '4',\n",
+       " '5',\n",
+       " '6',\n",
+       " '7',\n",
+       " '8',\n",
+       " '9',\n",
+       " '10',\n",
+       " '11',\n",
+       " '12',\n",
+       " '13',\n",
+       " '14',\n",
+       " '15',\n",
+       " '16',\n",
+       " '17',\n",
+       " '18',\n",
+       " '19',\n",
+       " '20',\n",
+       " '21',\n",
+       " '22',\n",
+       " '23',\n",
+       " '24',\n",
+       " '25',\n",
+       " '26',\n",
+       " '27',\n",
+       " '28',\n",
+       " '29',\n",
+       " '30',\n",
+       " '31',\n",
+       " '32',\n",
+       " '33',\n",
+       " '34',\n",
+       " '35',\n",
+       " '36',\n",
+       " '37',\n",
+       " '38',\n",
+       " '39',\n",
+       " '40',\n",
+       " '41',\n",
+       " '42',\n",
+       " '43',\n",
+       " '44',\n",
+       " '45',\n",
+       " '46',\n",
+       " '47',\n",
+       " '48',\n",
+       " '49',\n",
+       " '50',\n",
+       " '51',\n",
+       " '52',\n",
+       " '53',\n",
+       " '54',\n",
+       " '55',\n",
+       " '56',\n",
+       " '57',\n",
+       " '58',\n",
+       " '59',\n",
+       " '60',\n",
+       " '61',\n",
+       " '62',\n",
+       " '63',\n",
+       " '64',\n",
+       " '65',\n",
+       " '66',\n",
+       " '67',\n",
+       " '68',\n",
+       " '69',\n",
+       " '70',\n",
+       " '71',\n",
+       " '72',\n",
+       " '73',\n",
+       " '74',\n",
+       " '75',\n",
+       " '76',\n",
+       " '77',\n",
+       " '78',\n",
+       " '79',\n",
+       " '80',\n",
+       " '81',\n",
+       " '82',\n",
+       " '83',\n",
+       " '84',\n",
+       " '85',\n",
+       " '86',\n",
+       " '87',\n",
+       " '88',\n",
+       " '89',\n",
+       " '90',\n",
+       " '91',\n",
+       " '92',\n",
+       " '93',\n",
+       " '94',\n",
+       " '95',\n",
+       " '96',\n",
+       " '97',\n",
+       " '98',\n",
+       " '99',\n",
+       " '100',\n",
+       " '101',\n",
+       " '102',\n",
+       " '103',\n",
+       " '104',\n",
+       " '105',\n",
+       " '106',\n",
+       " '107',\n",
+       " '108',\n",
+       " '109',\n",
+       " '110',\n",
+       " '111',\n",
+       " '112',\n",
+       " '113',\n",
+       " '114',\n",
+       " '115',\n",
+       " '116',\n",
+       " '117',\n",
+       " '118',\n",
+       " '119',\n",
+       " '120',\n",
+       " '121',\n",
+       " '122',\n",
+       " '123',\n",
+       " '124',\n",
+       " '125',\n",
+       " '126',\n",
+       " '127',\n",
+       " '128',\n",
+       " '129',\n",
+       " '130',\n",
+       " '131',\n",
+       " '132',\n",
+       " '133',\n",
+       " '134',\n",
+       " '135',\n",
+       " '136',\n",
+       " '137',\n",
+       " '138',\n",
+       " '139',\n",
+       " '140',\n",
+       " '141',\n",
+       " '142',\n",
+       " '143',\n",
+       " '144',\n",
+       " '145',\n",
+       " '146',\n",
+       " '147',\n",
+       " '148',\n",
+       " '149',\n",
+       " '150',\n",
+       " '151',\n",
+       " '152',\n",
+       " '153',\n",
+       " '154',\n",
+       " '155',\n",
+       " '156',\n",
+       " '157',\n",
+       " '158',\n",
+       " '159',\n",
+       " '160',\n",
+       " '161',\n",
+       " '162',\n",
+       " '163',\n",
+       " '164',\n",
+       " '165',\n",
+       " '166',\n",
+       " '167',\n",
+       " '168',\n",
+       " '169',\n",
+       " '170',\n",
+       " '171',\n",
+       " '172',\n",
+       " '173',\n",
+       " '174',\n",
+       " '175',\n",
+       " '176',\n",
+       " '177',\n",
+       " '178',\n",
+       " '179',\n",
+       " '180',\n",
+       " '181',\n",
+       " '182',\n",
+       " '183',\n",
+       " '184',\n",
+       " '185',\n",
+       " '186',\n",
+       " '187',\n",
+       " '188',\n",
+       " '189',\n",
+       " '190',\n",
+       " '191',\n",
+       " '192',\n",
+       " '193',\n",
+       " '194',\n",
+       " '195',\n",
+       " '196',\n",
+       " '197',\n",
+       " '198',\n",
+       " '199',\n",
+       " '200',\n",
+       " '201',\n",
+       " '202',\n",
+       " '203',\n",
+       " '204',\n",
+       " '205',\n",
+       " '206',\n",
+       " '207',\n",
+       " '208',\n",
+       " '209',\n",
+       " '210',\n",
+       " '211',\n",
+       " '212',\n",
+       " '213',\n",
+       " '214',\n",
+       " '215',\n",
+       " '216',\n",
+       " '217',\n",
+       " '218',\n",
+       " '219',\n",
+       " '220',\n",
+       " '221',\n",
+       " '222',\n",
+       " '223',\n",
+       " '224',\n",
+       " '225',\n",
+       " '226',\n",
+       " '227',\n",
+       " '228',\n",
+       " '229',\n",
+       " '230',\n",
+       " '231',\n",
+       " '232',\n",
+       " '233',\n",
+       " '234',\n",
+       " '235',\n",
+       " '236',\n",
+       " '237',\n",
+       " '238',\n",
+       " '239',\n",
+       " '240',\n",
+       " '241',\n",
+       " '242',\n",
+       " '243',\n",
+       " '244',\n",
+       " '245',\n",
+       " '246',\n",
+       " '247',\n",
+       " '248',\n",
+       " '249',\n",
+       " '250',\n",
+       " '251',\n",
+       " '252',\n",
+       " '253',\n",
+       " '254',\n",
+       " '255',\n",
+       " '256',\n",
+       " '257',\n",
+       " '258',\n",
+       " '259',\n",
+       " '260',\n",
+       " '261',\n",
+       " '262',\n",
+       " '263',\n",
+       " '264',\n",
+       " '265',\n",
+       " '266',\n",
+       " '267',\n",
+       " '268',\n",
+       " '269',\n",
+       " '270',\n",
+       " '271',\n",
+       " '272',\n",
+       " '273',\n",
+       " '274',\n",
+       " '275',\n",
+       " '276',\n",
+       " '277',\n",
+       " '278',\n",
+       " '279',\n",
+       " '280',\n",
+       " '281',\n",
+       " '282',\n",
+       " '283',\n",
+       " '284',\n",
+       " '285',\n",
+       " '286',\n",
+       " '287',\n",
+       " '288',\n",
+       " '289',\n",
+       " '290',\n",
+       " '291',\n",
+       " '292',\n",
+       " '293',\n",
+       " '294',\n",
+       " '295',\n",
+       " '296',\n",
+       " '297',\n",
+       " '298',\n",
+       " '299',\n",
+       " '300',\n",
+       " '301',\n",
+       " '302',\n",
+       " '303',\n",
+       " '304',\n",
+       " '305',\n",
+       " '306',\n",
+       " '307',\n",
+       " '308',\n",
+       " '309',\n",
+       " '310',\n",
+       " '311',\n",
+       " '312',\n",
+       " '313',\n",
+       " '314',\n",
+       " '315',\n",
+       " '316',\n",
+       " '317',\n",
+       " '318',\n",
+       " '319',\n",
+       " '320',\n",
+       " '321',\n",
+       " '322',\n",
+       " '323',\n",
+       " '324',\n",
+       " '325',\n",
+       " '326',\n",
+       " '327',\n",
+       " '328',\n",
+       " '329',\n",
+       " '330',\n",
+       " '331',\n",
+       " '332',\n",
+       " '333',\n",
+       " '334',\n",
+       " '335',\n",
+       " '336',\n",
+       " '337',\n",
+       " '338',\n",
+       " '339',\n",
+       " '340',\n",
+       " '341',\n",
+       " '342',\n",
+       " '343',\n",
+       " '344',\n",
+       " '345',\n",
+       " '346',\n",
+       " '347',\n",
+       " '348',\n",
+       " '349',\n",
+       " '350',\n",
+       " '351',\n",
+       " '352',\n",
+       " '353',\n",
+       " '354',\n",
+       " '355',\n",
+       " '356',\n",
+       " '357',\n",
+       " '358',\n",
+       " '359',\n",
+       " '360',\n",
+       " '361',\n",
+       " '362',\n",
+       " '363',\n",
+       " '364',\n",
+       " '365',\n",
+       " '366',\n",
+       " '367',\n",
+       " '368',\n",
+       " '369',\n",
+       " '370',\n",
+       " '371',\n",
+       " '372',\n",
+       " '373',\n",
+       " '374',\n",
+       " '375',\n",
+       " '376',\n",
+       " '377',\n",
+       " '378',\n",
+       " '379',\n",
+       " '380',\n",
+       " '381',\n",
+       " '382',\n",
+       " '383',\n",
+       " '384',\n",
+       " '385',\n",
+       " '386',\n",
+       " '387',\n",
+       " '388',\n",
+       " '389',\n",
+       " '390',\n",
+       " '391',\n",
+       " '392',\n",
+       " '393',\n",
+       " '394',\n",
+       " '395',\n",
+       " '396',\n",
+       " '397',\n",
+       " '398',\n",
+       " '399',\n",
+       " '400',\n",
+       " '401',\n",
+       " '402',\n",
+       " '403',\n",
+       " '404',\n",
+       " '405',\n",
+       " '406',\n",
+       " '407',\n",
+       " '408',\n",
+       " '409',\n",
+       " '410',\n",
+       " '411',\n",
+       " '412',\n",
+       " '413',\n",
+       " '414',\n",
+       " '415',\n",
+       " '416',\n",
+       " '417',\n",
+       " '418',\n",
+       " '419',\n",
+       " '420',\n",
+       " '421',\n",
+       " '422',\n",
+       " '423',\n",
+       " '424',\n",
+       " '425',\n",
+       " '426',\n",
+       " '427',\n",
+       " '428',\n",
+       " '429',\n",
+       " '430',\n",
+       " '431',\n",
+       " '432',\n",
+       " '433',\n",
+       " '434',\n",
+       " '435',\n",
+       " '436',\n",
+       " '437',\n",
+       " '438',\n",
+       " '439',\n",
+       " '440',\n",
+       " '441',\n",
+       " '442',\n",
+       " '443',\n",
+       " '444',\n",
+       " '445',\n",
+       " '446',\n",
+       " '447',\n",
+       " '448',\n",
+       " '449',\n",
+       " '450',\n",
+       " '451',\n",
+       " '452',\n",
+       " '453',\n",
+       " '454',\n",
+       " '455',\n",
+       " '456',\n",
+       " '457',\n",
+       " '458',\n",
+       " '459',\n",
+       " '460',\n",
+       " '461',\n",
+       " '462',\n",
+       " '463',\n",
+       " '464',\n",
+       " '465',\n",
+       " '466',\n",
+       " '467',\n",
+       " '468',\n",
+       " '469',\n",
+       " '470',\n",
+       " '471',\n",
+       " '472',\n",
+       " '473',\n",
+       " '474',\n",
+       " '475',\n",
+       " '476',\n",
+       " '477',\n",
+       " '478',\n",
+       " '479',\n",
+       " '480',\n",
+       " '481',\n",
+       " '482',\n",
+       " '483',\n",
+       " '484',\n",
+       " '485',\n",
+       " '486',\n",
+       " '487',\n",
+       " '488',\n",
+       " '489',\n",
+       " '490',\n",
+       " '491',\n",
+       " '492',\n",
+       " '493',\n",
+       " '494',\n",
+       " '495',\n",
+       " '496',\n",
+       " '497',\n",
+       " '498',\n",
+       " '499',\n",
+       " '500',\n",
+       " '501',\n",
+       " '502',\n",
+       " '503',\n",
+       " '504',\n",
+       " '505',\n",
+       " '506',\n",
+       " '507',\n",
+       " '508',\n",
+       " '509',\n",
+       " '510',\n",
+       " '511',\n",
+       " '512',\n",
+       " '513',\n",
+       " '514',\n",
+       " '515',\n",
+       " '516',\n",
+       " '517',\n",
+       " '518',\n",
+       " '519',\n",
+       " '520',\n",
+       " '521',\n",
+       " '522',\n",
+       " '523',\n",
+       " '524',\n",
+       " '525',\n",
+       " '526',\n",
+       " '527',\n",
+       " '528',\n",
+       " '529',\n",
+       " '530',\n",
+       " '531',\n",
+       " '532',\n",
+       " '533',\n",
+       " '534',\n",
+       " '535',\n",
+       " '536',\n",
+       " '537',\n",
+       " '538',\n",
+       " '539',\n",
+       " '540',\n",
+       " '541',\n",
+       " '542',\n",
+       " '543',\n",
+       " '544',\n",
+       " '545',\n",
+       " '546',\n",
+       " '547',\n",
+       " '548',\n",
+       " '549',\n",
+       " '550',\n",
+       " '551',\n",
+       " '552',\n",
+       " '553',\n",
+       " '554',\n",
+       " '555',\n",
+       " '556',\n",
+       " '557',\n",
+       " '558',\n",
+       " '559',\n",
+       " '560',\n",
+       " '561',\n",
+       " '562',\n",
+       " '563',\n",
+       " '564',\n",
+       " '565',\n",
+       " '566',\n",
+       " '567',\n",
+       " '568',\n",
+       " '569',\n",
+       " '570',\n",
+       " '571',\n",
+       " '572',\n",
+       " '573',\n",
+       " '574',\n",
+       " '575',\n",
+       " '576',\n",
+       " '577',\n",
+       " '578',\n",
+       " '579',\n",
+       " '580',\n",
+       " '581',\n",
+       " '582',\n",
+       " '583',\n",
+       " '584',\n",
+       " '585',\n",
+       " '586',\n",
+       " '587',\n",
+       " '588',\n",
+       " '589',\n",
+       " '590',\n",
+       " '591',\n",
+       " '592',\n",
+       " '593',\n",
+       " '594',\n",
+       " '595',\n",
+       " '596',\n",
+       " '597',\n",
+       " '598',\n",
+       " '599',\n",
+       " '600',\n",
+       " '601',\n",
+       " '602',\n",
+       " '603',\n",
+       " '604',\n",
+       " '605',\n",
+       " '606',\n",
+       " '607',\n",
+       " '608',\n",
+       " '609',\n",
+       " '610',\n",
+       " '611',\n",
+       " '612',\n",
+       " '613',\n",
+       " '614',\n",
+       " '615',\n",
+       " '616',\n",
+       " '617',\n",
+       " '618',\n",
+       " '619',\n",
+       " '620',\n",
+       " '621',\n",
+       " '622',\n",
+       " '623',\n",
+       " '624',\n",
+       " '625',\n",
+       " '626',\n",
+       " '627',\n",
+       " '628',\n",
+       " '629',\n",
+       " '630',\n",
+       " '631',\n",
+       " '632',\n",
+       " '633',\n",
+       " '634',\n",
+       " '635',\n",
+       " '636',\n",
+       " '637',\n",
+       " '638',\n",
+       " '639',\n",
+       " '640',\n",
+       " '641',\n",
+       " '642',\n",
+       " '643',\n",
+       " '644',\n",
+       " '645',\n",
+       " '646',\n",
+       " '647',\n",
+       " '648',\n",
+       " '649',\n",
+       " '650',\n",
+       " '651',\n",
+       " '652',\n",
+       " '653',\n",
+       " '654',\n",
+       " '655',\n",
+       " '656',\n",
+       " '657',\n",
+       " '658',\n",
+       " '659',\n",
+       " '660',\n",
+       " '661',\n",
+       " '662',\n",
+       " '663',\n",
+       " '664',\n",
+       " '665',\n",
+       " '666',\n",
+       " '667',\n",
+       " '668',\n",
+       " '669',\n",
+       " '670',\n",
+       " '671',\n",
+       " '672',\n",
+       " '673',\n",
+       " '674',\n",
+       " '675',\n",
+       " '676',\n",
+       " '677',\n",
+       " '678',\n",
+       " '679',\n",
+       " '680',\n",
+       " '681',\n",
+       " '682',\n",
+       " '683',\n",
+       " '684',\n",
+       " '685',\n",
+       " '686',\n",
+       " '687',\n",
+       " '688',\n",
+       " '689',\n",
+       " '690',\n",
+       " '691',\n",
+       " '692',\n",
+       " '693',\n",
+       " '694',\n",
+       " '695',\n",
+       " '696',\n",
+       " '697',\n",
+       " '698',\n",
+       " '699',\n",
+       " '700',\n",
+       " '701',\n",
+       " '702',\n",
+       " '703',\n",
+       " '704',\n",
+       " '705',\n",
+       " '706',\n",
+       " '707',\n",
+       " '708',\n",
+       " '709',\n",
+       " '710',\n",
+       " '711',\n",
+       " '712',\n",
+       " '713',\n",
+       " '714',\n",
+       " '715',\n",
+       " '716',\n",
+       " '717',\n",
+       " '718',\n",
+       " '719',\n",
+       " '720',\n",
+       " '721',\n",
+       " '722',\n",
+       " '723',\n",
+       " '724',\n",
+       " '725',\n",
+       " '726',\n",
+       " '727',\n",
+       " '728',\n",
+       " '729',\n",
+       " '730',\n",
+       " '731',\n",
+       " '732',\n",
+       " '733',\n",
+       " '734',\n",
+       " '735',\n",
+       " '736',\n",
+       " '737',\n",
+       " '738',\n",
+       " '739',\n",
+       " '740',\n",
+       " '741',\n",
+       " '742',\n",
+       " '743',\n",
+       " '744',\n",
+       " '745',\n",
+       " '746',\n",
+       " '747',\n",
+       " '748',\n",
+       " '749',\n",
+       " '750',\n",
+       " '751',\n",
+       " '752',\n",
+       " '753',\n",
+       " '754',\n",
+       " '755',\n",
+       " '756',\n",
+       " '757',\n",
+       " '758',\n",
+       " '759',\n",
+       " '760',\n",
+       " '761',\n",
+       " '762',\n",
+       " '763',\n",
+       " '764',\n",
+       " '765',\n",
+       " '766',\n",
+       " '767',\n",
+       " '768',\n",
+       " '769',\n",
+       " '770',\n",
+       " '771',\n",
+       " '772',\n",
+       " '773',\n",
+       " '774',\n",
+       " '775',\n",
+       " '776',\n",
+       " '777',\n",
+       " '778',\n",
+       " '779',\n",
+       " '780',\n",
+       " '781',\n",
+       " '782',\n",
+       " '783',\n",
+       " '784',\n",
+       " '785',\n",
+       " '786',\n",
+       " '787',\n",
+       " '788',\n",
+       " '789',\n",
+       " '790',\n",
+       " '791',\n",
+       " '792',\n",
+       " '793',\n",
+       " '794',\n",
+       " '795',\n",
+       " '796',\n",
+       " '797',\n",
+       " '798',\n",
+       " '799',\n",
+       " '800',\n",
+       " '801',\n",
+       " '802',\n",
+       " '803',\n",
+       " '804',\n",
+       " '805',\n",
+       " '806',\n",
+       " '807',\n",
+       " '808',\n",
+       " '809',\n",
+       " '810',\n",
+       " '811',\n",
+       " '812',\n",
+       " '813',\n",
+       " '814',\n",
+       " '815',\n",
+       " '816',\n",
+       " '817',\n",
+       " '818',\n",
+       " '819',\n",
+       " '820',\n",
+       " '821',\n",
+       " '822',\n",
+       " '823',\n",
+       " '824',\n",
+       " '825',\n",
+       " '826',\n",
+       " '827',\n",
+       " '828',\n",
+       " '829',\n",
+       " '830',\n",
+       " '831',\n",
+       " '832',\n",
+       " '833',\n",
+       " '834',\n",
+       " '835',\n",
+       " '836',\n",
+       " '837',\n",
+       " '838',\n",
+       " '839',\n",
+       " '840',\n",
+       " '841',\n",
+       " '842',\n",
+       " '843',\n",
+       " '844',\n",
+       " '845',\n",
+       " '846',\n",
+       " '847',\n",
+       " '848',\n",
+       " '849',\n",
+       " '850',\n",
+       " '851',\n",
+       " '852',\n",
+       " '853',\n",
+       " '854',\n",
+       " '855',\n",
+       " '856',\n",
+       " '857',\n",
+       " '858',\n",
+       " '859',\n",
+       " '860',\n",
+       " '861',\n",
+       " '862',\n",
+       " '863',\n",
+       " '864',\n",
+       " '865',\n",
+       " '866',\n",
+       " '867',\n",
+       " '868',\n",
+       " '869',\n",
+       " '870',\n",
+       " '871',\n",
+       " '872',\n",
+       " '873',\n",
+       " '874',\n",
+       " '875',\n",
+       " '876',\n",
+       " '877',\n",
+       " '878',\n",
+       " '879',\n",
+       " '880',\n",
+       " '881',\n",
+       " '882',\n",
+       " '883',\n",
+       " '884',\n",
+       " '885',\n",
+       " '886',\n",
+       " '887',\n",
+       " '888',\n",
+       " '889',\n",
+       " '890',\n",
+       " '891',\n",
+       " '892',\n",
+       " '893',\n",
+       " '894',\n",
+       " '895',\n",
+       " '896',\n",
+       " '897',\n",
+       " '898',\n",
+       " '899',\n",
+       " '900',\n",
+       " '901',\n",
+       " '902',\n",
+       " '903',\n",
+       " '904',\n",
+       " '905',\n",
+       " '906',\n",
+       " '907',\n",
+       " '908',\n",
+       " '909',\n",
+       " '910',\n",
+       " '911',\n",
+       " '912',\n",
+       " '913',\n",
+       " '914',\n",
+       " '915',\n",
+       " '916',\n",
+       " '917',\n",
+       " '918',\n",
+       " '919',\n",
+       " '920',\n",
+       " '921',\n",
+       " '922',\n",
+       " '923',\n",
+       " '924',\n",
+       " '925',\n",
+       " '926',\n",
+       " '927',\n",
+       " '928',\n",
+       " '929',\n",
+       " '930',\n",
+       " '931',\n",
+       " '932',\n",
+       " '933',\n",
+       " '934',\n",
+       " '935',\n",
+       " '936',\n",
+       " '937',\n",
+       " '938',\n",
+       " '939',\n",
+       " '940',\n",
+       " '941',\n",
+       " '942',\n",
+       " '943',\n",
+       " '944',\n",
+       " '945',\n",
+       " '946',\n",
+       " '947',\n",
+       " '948',\n",
+       " '949',\n",
+       " '950',\n",
+       " '951',\n",
+       " '952',\n",
+       " '953',\n",
+       " '954',\n",
+       " '955',\n",
+       " '956',\n",
+       " '957',\n",
+       " '958',\n",
+       " '959',\n",
+       " '960',\n",
+       " '961',\n",
+       " '962',\n",
+       " '963',\n",
+       " '964',\n",
+       " '965',\n",
+       " '966',\n",
+       " '967',\n",
+       " '968',\n",
+       " '969',\n",
+       " '970',\n",
+       " '971',\n",
+       " '972',\n",
+       " '973',\n",
+       " '974',\n",
+       " '975',\n",
+       " '976',\n",
+       " '977',\n",
+       " '978',\n",
+       " '979',\n",
+       " '980',\n",
+       " '981',\n",
+       " '982',\n",
+       " '983',\n",
+       " '984',\n",
+       " '985',\n",
+       " '986',\n",
+       " '987',\n",
+       " '988',\n",
+       " '989',\n",
+       " '990',\n",
+       " '991',\n",
+       " '992',\n",
+       " '993',\n",
+       " '994',\n",
+       " '995',\n",
+       " '996',\n",
+       " '997',\n",
+       " '998',\n",
+       " '999',\n",
+       " ...]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# dataset.transform(\"masks_to_polygons\")\n",
     "reindexing_dataset = dataset.transform(\"reindex\", start=0)\n",
-    "\n",
-    "ids = get_ids(reindexing_dataset, subsets[0])"
+    "get_ids(reindexing_dataset, subsets[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a77fbadc",
+   "metadata": {},
+   "source": [
+    "By adopting `id_from_image_name`, we can rollback the `media_id` to be the media name."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 17,
    "id": "fb608396",
    "metadata": {
     "scrolled": true
    },
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset\n",
-      "\tsize=123287\n",
-      "\tsource_path=coco_dataset\n",
-      "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
-      "\tannotated_items_count=122218\n",
-      "\tannotations_count=1018861\n",
-      "subsets\n",
-      "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
-      "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
-      "categories\n",
-      "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
-      "\n"
-     ]
+     "data": {
+      "text/plain": [
+       "['000000397133',\n",
+       " '000000037777',\n",
+       " '000000252219',\n",
+       " '000000087038',\n",
+       " '000000174482',\n",
+       " '000000403385',\n",
+       " '000000006818',\n",
+       " '000000480985',\n",
+       " '000000458054',\n",
+       " '000000331352',\n",
+       " '000000296649',\n",
+       " '000000386912',\n",
+       " '000000502136',\n",
+       " '000000491497',\n",
+       " '000000184791',\n",
+       " '000000348881',\n",
+       " '000000289393',\n",
+       " '000000522713',\n",
+       " '000000181666',\n",
+       " '000000017627',\n",
+       " '000000143931',\n",
+       " '000000303818',\n",
+       " '000000463730',\n",
+       " '000000460347',\n",
+       " '000000322864',\n",
+       " '000000226111',\n",
+       " '000000153299',\n",
+       " '000000308394',\n",
+       " '000000456496',\n",
+       " '000000058636',\n",
+       " '000000041888',\n",
+       " '000000184321',\n",
+       " '000000565778',\n",
+       " '000000297343',\n",
+       " '000000336587',\n",
+       " '000000122745',\n",
+       " '000000219578',\n",
+       " '000000555705',\n",
+       " '000000443303',\n",
+       " '000000500663',\n",
+       " '000000418281',\n",
+       " '000000025560',\n",
+       " '000000403817',\n",
+       " '000000085329',\n",
+       " '000000329323',\n",
+       " '000000239274',\n",
+       " '000000286994',\n",
+       " '000000511321',\n",
+       " '000000314294',\n",
+       " '000000233771',\n",
+       " '000000475779',\n",
+       " '000000301867',\n",
+       " '000000312421',\n",
+       " '000000185250',\n",
+       " '000000356427',\n",
+       " '000000572517',\n",
+       " '000000270244',\n",
+       " '000000516316',\n",
+       " '000000125211',\n",
+       " '000000562121',\n",
+       " '000000360661',\n",
+       " '000000016228',\n",
+       " '000000382088',\n",
+       " '000000266409',\n",
+       " '000000430961',\n",
+       " '000000080671',\n",
+       " '000000577539',\n",
+       " '000000104612',\n",
+       " '000000476258',\n",
+       " '000000448365',\n",
+       " '000000035197',\n",
+       " '000000349860',\n",
+       " '000000180135',\n",
+       " '000000486438',\n",
+       " '000000400573',\n",
+       " '000000109798',\n",
+       " '000000370677',\n",
+       " '000000238866',\n",
+       " '000000369370',\n",
+       " '000000502737',\n",
+       " '000000515579',\n",
+       " '000000515445',\n",
+       " '000000173383',\n",
+       " '000000438862',\n",
+       " '000000180560',\n",
+       " '000000347693',\n",
+       " '000000039956',\n",
+       " '000000321214',\n",
+       " '000000474028',\n",
+       " '000000066523',\n",
+       " '000000355257',\n",
+       " '000000142092',\n",
+       " '000000063154',\n",
+       " '000000199551',\n",
+       " '000000239347',\n",
+       " '000000514508',\n",
+       " '000000473237',\n",
+       " '000000228144',\n",
+       " '000000206027',\n",
+       " '000000078915',\n",
+       " '000000551215',\n",
+       " '000000544519',\n",
+       " '000000096493',\n",
+       " '000000023899',\n",
+       " '000000340175',\n",
+       " '000000578500',\n",
+       " '000000366141',\n",
+       " '000000057597',\n",
+       " '000000559842',\n",
+       " '000000434230',\n",
+       " '000000428454',\n",
+       " '000000399462',\n",
+       " '000000261061',\n",
+       " '000000168330',\n",
+       " '000000383384',\n",
+       " '000000342006',\n",
+       " '000000217285',\n",
+       " '000000236412',\n",
+       " '000000524456',\n",
+       " '000000153343',\n",
+       " '000000095786',\n",
+       " '000000326541',\n",
+       " '000000213086',\n",
+       " '000000231339',\n",
+       " '000000508730',\n",
+       " '000000550426',\n",
+       " '000000368294',\n",
+       " '000000171190',\n",
+       " '000000301135',\n",
+       " '000000580294',\n",
+       " '000000494869',\n",
+       " '000000033638',\n",
+       " '000000329219',\n",
+       " '000000034873',\n",
+       " '000000186980',\n",
+       " '000000127182',\n",
+       " '000000356387',\n",
+       " '000000367680',\n",
+       " '000000263796',\n",
+       " '000000117425',\n",
+       " '000000365387',\n",
+       " '000000487583',\n",
+       " '000000504711',\n",
+       " '000000363840',\n",
+       " '000000214720',\n",
+       " '000000379453',\n",
+       " '000000311295',\n",
+       " '000000029393',\n",
+       " '000000278848',\n",
+       " '000000166391',\n",
+       " '000000048153',\n",
+       " '000000459153',\n",
+       " '000000295713',\n",
+       " '000000223130',\n",
+       " '000000273132',\n",
+       " '000000198960',\n",
+       " '000000344059',\n",
+       " '000000410428',\n",
+       " '000000087875',\n",
+       " '000000450758',\n",
+       " '000000458790',\n",
+       " '000000460160',\n",
+       " '000000458109',\n",
+       " '000000030675',\n",
+       " '000000566524',\n",
+       " '000000338428',\n",
+       " '000000545826',\n",
+       " '000000166277',\n",
+       " '000000269314',\n",
+       " '000000476415',\n",
+       " '000000292082',\n",
+       " '000000360137',\n",
+       " '000000122046',\n",
+       " '000000352684',\n",
+       " '000000512836',\n",
+       " '000000008021',\n",
+       " '000000107226',\n",
+       " '000000084477',\n",
+       " '000000562243',\n",
+       " '000000181859',\n",
+       " '000000177015',\n",
+       " '000000292236',\n",
+       " '000000121506',\n",
+       " '000000288042',\n",
+       " '000000453860',\n",
+       " '000000500257',\n",
+       " '000000113403',\n",
+       " '000000125062',\n",
+       " '000000375015',\n",
+       " '000000334719',\n",
+       " '000000134112',\n",
+       " '000000283520',\n",
+       " '000000031269',\n",
+       " '000000319721',\n",
+       " '000000165351',\n",
+       " '000000347265',\n",
+       " '000000414170',\n",
+       " '000000231508',\n",
+       " '000000389381',\n",
+       " '000000118921',\n",
+       " '000000021503',\n",
+       " '000000000785',\n",
+       " '000000300842',\n",
+       " '000000105014',\n",
+       " '000000261982',\n",
+       " '000000034205',\n",
+       " '000000099242',\n",
+       " '000000314709',\n",
+       " '000000460494',\n",
+       " '000000339442',\n",
+       " '000000541055',\n",
+       " '000000409475',\n",
+       " '000000464786',\n",
+       " '000000378605',\n",
+       " '000000331817',\n",
+       " '000000218091',\n",
+       " '000000578545',\n",
+       " '000000363207',\n",
+       " '000000372577',\n",
+       " '000000212166',\n",
+       " '000000172571',\n",
+       " '000000294831',\n",
+       " '000000084431',\n",
+       " '000000323355',\n",
+       " '000000355325',\n",
+       " '000000100582',\n",
+       " '000000555412',\n",
+       " '000000004495',\n",
+       " '000000009483',\n",
+       " '000000326082',\n",
+       " '000000398237',\n",
+       " '000000507223',\n",
+       " '000000031050',\n",
+       " '000000239537',\n",
+       " '000000340930',\n",
+       " '000000011813',\n",
+       " '000000281414',\n",
+       " '000000537991',\n",
+       " '000000284282',\n",
+       " '000000321333',\n",
+       " '000000521282',\n",
+       " '000000108026',\n",
+       " '000000243204',\n",
+       " '000000177935',\n",
+       " '000000038829',\n",
+       " '000000397327',\n",
+       " '000000501523',\n",
+       " '000000555050',\n",
+       " '000000376442',\n",
+       " '000000187243',\n",
+       " '000000356347',\n",
+       " '000000293044',\n",
+       " '000000560279',\n",
+       " '000000042276',\n",
+       " '000000534827',\n",
+       " '000000190756',\n",
+       " '000000482917',\n",
+       " '000000300659',\n",
+       " '000000199977',\n",
+       " '000000442480',\n",
+       " '000000384350',\n",
+       " '000000383621',\n",
+       " '000000189828',\n",
+       " '000000412894',\n",
+       " '000000537153',\n",
+       " '000000361103',\n",
+       " '000000392722',\n",
+       " '000000338560',\n",
+       " '000000264535',\n",
+       " '000000295231',\n",
+       " '000000154947',\n",
+       " '000000212559',\n",
+       " '000000458755',\n",
+       " '000000104782',\n",
+       " '000000315257',\n",
+       " '000000130599',\n",
+       " '000000227187',\n",
+       " '000000151662',\n",
+       " '000000461275',\n",
+       " '000000523811',\n",
+       " '000000456559',\n",
+       " '000000101068',\n",
+       " '000000140640',\n",
+       " '000000516708',\n",
+       " '000000544605',\n",
+       " '000000385190',\n",
+       " '000000338986',\n",
+       " '000000053994',\n",
+       " '000000061171',\n",
+       " '000000314034',\n",
+       " '000000291490',\n",
+       " '000000152740',\n",
+       " '000000024919',\n",
+       " '000000079837',\n",
+       " '000000021903',\n",
+       " '000000564133',\n",
+       " '000000337055',\n",
+       " '000000110638',\n",
+       " '000000034139',\n",
+       " '000000080340',\n",
+       " '000000083113',\n",
+       " '000000173033',\n",
+       " '000000255664',\n",
+       " '000000072813',\n",
+       " '000000545129',\n",
+       " '000000546011',\n",
+       " '000000121031',\n",
+       " '000000172547',\n",
+       " '000000369081',\n",
+       " '000000509131',\n",
+       " '000000578922',\n",
+       " '000000464089',\n",
+       " '000000453708',\n",
+       " '000000177714',\n",
+       " '000000459887',\n",
+       " '000000155179',\n",
+       " '000000261116',\n",
+       " '000000396274',\n",
+       " '000000029640',\n",
+       " '000000141328',\n",
+       " '000000308430',\n",
+       " '000000043314',\n",
+       " '000000273715',\n",
+       " '000000456303',\n",
+       " '000000406611',\n",
+       " '000000475064',\n",
+       " '000000466567',\n",
+       " '000000137246',\n",
+       " '000000015079',\n",
+       " '000000296284',\n",
+       " '000000226147',\n",
+       " '000000226903',\n",
+       " '000000127517',\n",
+       " '000000162092',\n",
+       " '000000131379',\n",
+       " '000000366611',\n",
+       " '000000263969',\n",
+       " '000000551439',\n",
+       " '000000474167',\n",
+       " '000000159458',\n",
+       " '000000554735',\n",
+       " '000000099428',\n",
+       " '000000386352',\n",
+       " '000000173004',\n",
+       " '000000311394',\n",
+       " '000000578489',\n",
+       " '000000189310',\n",
+       " '000000491366',\n",
+       " '000000448076',\n",
+       " '000000293804',\n",
+       " '000000312237',\n",
+       " '000000221291',\n",
+       " '000000141821',\n",
+       " '000000410650',\n",
+       " '000000199310',\n",
+       " '000000323151',\n",
+       " '000000089648',\n",
+       " '000000219283',\n",
+       " '000000471869',\n",
+       " '000000520264',\n",
+       " '000000111179',\n",
+       " '000000151000',\n",
+       " '000000100624',\n",
+       " '000000332570',\n",
+       " '000000057238',\n",
+       " '000000502732',\n",
+       " '000000135561',\n",
+       " '000000008277',\n",
+       " '000000173044',\n",
+       " '000000168458',\n",
+       " '000000512194',\n",
+       " '000000370042',\n",
+       " '000000189436',\n",
+       " '000000533958',\n",
+       " '000000117645',\n",
+       " '000000221708',\n",
+       " '000000202228',\n",
+       " '000000403565',\n",
+       " '000000211042',\n",
+       " '000000492878',\n",
+       " '000000441586',\n",
+       " '000000547816',\n",
+       " '000000306733',\n",
+       " '000000530099',\n",
+       " '000000312278',\n",
+       " '000000097679',\n",
+       " '000000564127',\n",
+       " '000000251065',\n",
+       " '000000003845',\n",
+       " '000000138819',\n",
+       " '000000205834',\n",
+       " '000000348708',\n",
+       " '000000166521',\n",
+       " '000000485802',\n",
+       " '000000099054',\n",
+       " '000000022969',\n",
+       " '000000570539',\n",
+       " '000000278353',\n",
+       " '000000158548',\n",
+       " '000000461405',\n",
+       " '000000176606',\n",
+       " '000000044699',\n",
+       " '000000559956',\n",
+       " '000000268996',\n",
+       " '000000011197',\n",
+       " '000000483667',\n",
+       " '000000448810',\n",
+       " '000000000724',\n",
+       " '000000051961',\n",
+       " '000000375278',\n",
+       " '000000302165',\n",
+       " '000000131131',\n",
+       " '000000098839',\n",
+       " '000000402992',\n",
+       " '000000465675',\n",
+       " '000000240754',\n",
+       " '000000021167',\n",
+       " '000000148730',\n",
+       " '000000384468',\n",
+       " '000000253742',\n",
+       " '000000186873',\n",
+       " '000000082180',\n",
+       " '000000446522',\n",
+       " '000000552902',\n",
+       " '000000125405',\n",
+       " '000000110211',\n",
+       " '000000016010',\n",
+       " '000000064462',\n",
+       " '000000314182',\n",
+       " '000000248980',\n",
+       " '000000068387',\n",
+       " '000000429281',\n",
+       " '000000345466',\n",
+       " '000000352900',\n",
+       " '000000118367',\n",
+       " '000000113235',\n",
+       " '000000311303',\n",
+       " '000000163640',\n",
+       " '000000370999',\n",
+       " '000000001490',\n",
+       " '000000329456',\n",
+       " '000000570471',\n",
+       " '000000088269',\n",
+       " '000000260470',\n",
+       " '000000193494',\n",
+       " '000000252776',\n",
+       " '000000201072',\n",
+       " '000000018150',\n",
+       " '000000337498',\n",
+       " '000000521405',\n",
+       " '000000518770',\n",
+       " '000000201646',\n",
+       " '000000036936',\n",
+       " '000000059044',\n",
+       " '000000172946',\n",
+       " '000000234607',\n",
+       " '000000532690',\n",
+       " '000000323895',\n",
+       " '000000384670',\n",
+       " '000000050326',\n",
+       " '000000205542',\n",
+       " '000000217957',\n",
+       " '000000162035',\n",
+       " '000000415727',\n",
+       " '000000046252',\n",
+       " '000000182021',\n",
+       " '000000231747',\n",
+       " '000000090284',\n",
+       " '000000286553',\n",
+       " '000000488736',\n",
+       " '000000063602',\n",
+       " '000000383386',\n",
+       " '000000450686',\n",
+       " '000000005060',\n",
+       " '000000286523',\n",
+       " '000000120420',\n",
+       " '000000579655',\n",
+       " '000000117908',\n",
+       " '000000550322',\n",
+       " '000000322844',\n",
+       " '000000218362',\n",
+       " '000000213224',\n",
+       " '000000223747',\n",
+       " '000000297578',\n",
+       " '000000458992',\n",
+       " '000000078266',\n",
+       " '000000164602',\n",
+       " '000000440475',\n",
+       " '000000101762',\n",
+       " '000000557501',\n",
+       " '000000203317',\n",
+       " '000000368940',\n",
+       " '000000569917',\n",
+       " '000000144798',\n",
+       " '000000284623',\n",
+       " '000000520301',\n",
+       " '000000127987',\n",
+       " '000000063740',\n",
+       " '000000036494',\n",
+       " '000000210032',\n",
+       " '000000488270',\n",
+       " '000000067180',\n",
+       " '000000281179',\n",
+       " '000000064359',\n",
+       " '000000126226',\n",
+       " '000000190923',\n",
+       " '000000150265',\n",
+       " '000000216739',\n",
+       " '000000038048',\n",
+       " '000000354829',\n",
+       " '000000525155',\n",
+       " '000000163314',\n",
+       " '000000259571',\n",
+       " '000000561679',\n",
+       " '000000236166',\n",
+       " '000000153529',\n",
+       " '000000473015',\n",
+       " '000000379800',\n",
+       " '000000253835',\n",
+       " '000000034071',\n",
+       " '000000036861',\n",
+       " '000000569565',\n",
+       " '000000219271',\n",
+       " '000000205647',\n",
+       " '000000460841',\n",
+       " '000000123131',\n",
+       " '000000334006',\n",
+       " '000000511599',\n",
+       " '000000229858',\n",
+       " '000000174004',\n",
+       " '000000519764',\n",
+       " '000000137576',\n",
+       " '000000087470',\n",
+       " '000000009769',\n",
+       " '000000558114',\n",
+       " '000000205776',\n",
+       " '000000163257',\n",
+       " '000000475678',\n",
+       " '000000085478',\n",
+       " '000000318080',\n",
+       " '000000361551',\n",
+       " '000000236784',\n",
+       " '000000092839',\n",
+       " '000000042296',\n",
+       " '000000560266',\n",
+       " '000000486479',\n",
+       " '000000127955',\n",
+       " '000000307658',\n",
+       " '000000417465',\n",
+       " '000000342971',\n",
+       " '000000011760',\n",
+       " '000000069106',\n",
+       " '000000070158',\n",
+       " '000000176634',\n",
+       " '000000281447',\n",
+       " '000000552371',\n",
+       " '000000361919',\n",
+       " '000000560256',\n",
+       " '000000138115',\n",
+       " '000000114871',\n",
+       " '000000374369',\n",
+       " '000000123213',\n",
+       " '000000123321',\n",
+       " '000000015278',\n",
+       " '000000357742',\n",
+       " '000000439854',\n",
+       " '000000465836',\n",
+       " '000000414385',\n",
+       " '000000131556',\n",
+       " '000000322724',\n",
+       " '000000320664',\n",
+       " '000000481390',\n",
+       " '000000109916',\n",
+       " '000000276434',\n",
+       " '000000579635',\n",
+       " '000000295316',\n",
+       " '000000571313',\n",
+       " '000000183127',\n",
+       " '000000115898',\n",
+       " '000000146358',\n",
+       " '000000329542',\n",
+       " '000000189752',\n",
+       " '000000290163',\n",
+       " '000000091406',\n",
+       " '000000322352',\n",
+       " '000000223959',\n",
+       " '000000326248',\n",
+       " '000000218439',\n",
+       " '000000453722',\n",
+       " '000000293625',\n",
+       " '000000411817',\n",
+       " '000000546964',\n",
+       " '000000215259',\n",
+       " '000000573094',\n",
+       " '000000560011',\n",
+       " '000000038576',\n",
+       " '000000147729',\n",
+       " '000000579307',\n",
+       " '000000154425',\n",
+       " '000000432898',\n",
+       " '000000404923',\n",
+       " '000000130586',\n",
+       " '000000163057',\n",
+       " '000000007511',\n",
+       " '000000067406',\n",
+       " '000000290179',\n",
+       " '000000248752',\n",
+       " '000000054593',\n",
+       " '000000116208',\n",
+       " '000000340697',\n",
+       " '000000450303',\n",
+       " '000000494427',\n",
+       " '000000137294',\n",
+       " '000000410880',\n",
+       " '000000311180',\n",
+       " '000000091654',\n",
+       " '000000181796',\n",
+       " '000000002431',\n",
+       " '000000349184',\n",
+       " '000000298396',\n",
+       " '000000472046',\n",
+       " '000000074058',\n",
+       " '000000058029',\n",
+       " '000000134096',\n",
+       " '000000111951',\n",
+       " '000000103585',\n",
+       " '000000210273',\n",
+       " '000000352584',\n",
+       " '000000446651',\n",
+       " '000000194875',\n",
+       " '000000052017',\n",
+       " '000000336309',\n",
+       " '000000227478',\n",
+       " '000000339870',\n",
+       " '000000080666',\n",
+       " '000000033707',\n",
+       " '000000327601',\n",
+       " '000000255749',\n",
+       " '000000008762',\n",
+       " '000000526392',\n",
+       " '000000535578',\n",
+       " '000000580757',\n",
+       " '000000165039',\n",
+       " '000000148719',\n",
+       " '000000108440',\n",
+       " '000000489842',\n",
+       " '000000579818',\n",
+       " '000000423229',\n",
+       " '000000323828',\n",
+       " '000000166287',\n",
+       " '000000101420',\n",
+       " '000000334555',\n",
+       " '000000196759',\n",
+       " '000000411665',\n",
+       " '000000061418',\n",
+       " '000000526751',\n",
+       " '000000024021',\n",
+       " '000000277020',\n",
+       " '000000047828',\n",
+       " '000000183716',\n",
+       " '000000271997',\n",
+       " '000000008532',\n",
+       " '000000094336',\n",
+       " '000000390555',\n",
+       " '000000250282',\n",
+       " '000000068409',\n",
+       " '000000002299',\n",
+       " '000000011051',\n",
+       " '000000066038',\n",
+       " '000000360960',\n",
+       " '000000360097',\n",
+       " '000000421455',\n",
+       " '000000504589',\n",
+       " '000000464522',\n",
+       " '000000454750',\n",
+       " '000000509735',\n",
+       " '000000023034',\n",
+       " '000000141671',\n",
+       " '000000506656',\n",
+       " '000000272566',\n",
+       " '000000045728',\n",
+       " '000000424551',\n",
+       " '000000341719',\n",
+       " '000000072795',\n",
+       " '000000078959',\n",
+       " '000000417285',\n",
+       " '000000002157',\n",
+       " '000000043816',\n",
+       " '000000455555',\n",
+       " '000000535306',\n",
+       " '000000030504',\n",
+       " '000000093353',\n",
+       " '000000530052',\n",
+       " '000000473118',\n",
+       " '000000091779',\n",
+       " '000000283113',\n",
+       " '000000226130',\n",
+       " '000000097278',\n",
+       " '000000567640',\n",
+       " '000000532493',\n",
+       " '000000045550',\n",
+       " '000000156643',\n",
+       " '000000430056',\n",
+       " '000000410456',\n",
+       " '000000441286',\n",
+       " '000000279541',\n",
+       " '000000000885',\n",
+       " '000000378284',\n",
+       " '000000156076',\n",
+       " '000000143572',\n",
+       " '000000229849',\n",
+       " '000000039551',\n",
+       " '000000056344',\n",
+       " '000000193348',\n",
+       " '000000016958',\n",
+       " '000000572678',\n",
+       " '000000106235',\n",
+       " '000000341681',\n",
+       " '000000083172',\n",
+       " '000000343524',\n",
+       " '000000395801',\n",
+       " '000000388056',\n",
+       " '000000259690',\n",
+       " '000000235836',\n",
+       " '000000343218',\n",
+       " '000000205105',\n",
+       " '000000513283',\n",
+       " '000000176446',\n",
+       " '000000371677',\n",
+       " '000000308531',\n",
+       " '000000497599',\n",
+       " '000000455352',\n",
+       " '000000236914',\n",
+       " '000000232684',\n",
+       " '000000415238',\n",
+       " '000000290843',\n",
+       " '000000519522',\n",
+       " '000000144784',\n",
+       " '000000167486',\n",
+       " '000000392228',\n",
+       " '000000488673',\n",
+       " '000000191013',\n",
+       " '000000080057',\n",
+       " '000000570169',\n",
+       " '000000224807',\n",
+       " '000000163562',\n",
+       " '000000136355',\n",
+       " '000000492362',\n",
+       " '000000102707',\n",
+       " '000000232563',\n",
+       " '000000010977',\n",
+       " '000000051598',\n",
+       " '000000032285',\n",
+       " '000000520910',\n",
+       " '000000131273',\n",
+       " '000000206411',\n",
+       " '000000472375',\n",
+       " '000000481404',\n",
+       " '000000471991',\n",
+       " '000000017436',\n",
+       " '000000177934',\n",
+       " '000000165518',\n",
+       " '000000571718',\n",
+       " '000000459467',\n",
+       " '000000135673',\n",
+       " '000000134886',\n",
+       " '000000485895',\n",
+       " '000000287545',\n",
+       " '000000577182',\n",
+       " '000000289222',\n",
+       " '000000372819',\n",
+       " '000000310072',\n",
+       " '000000087144',\n",
+       " '000000430875',\n",
+       " '000000060347',\n",
+       " '000000042070',\n",
+       " '000000420916',\n",
+       " '000000453584',\n",
+       " '000000296224',\n",
+       " '000000122606',\n",
+       " '000000311909',\n",
+       " '000000579893',\n",
+       " '000000284296',\n",
+       " '000000221017',\n",
+       " '000000315001',\n",
+       " '000000439715',\n",
+       " '000000284991',\n",
+       " '000000389566',\n",
+       " '000000078843',\n",
+       " '000000122927',\n",
+       " '000000225532',\n",
+       " '000000013659',\n",
+       " '000000153568',\n",
+       " '000000395633',\n",
+       " '000000419096',\n",
+       " '000000203488',\n",
+       " '000000361268',\n",
+       " '000000466125',\n",
+       " '000000414795',\n",
+       " '000000508101',\n",
+       " '000000253386',\n",
+       " '000000222991',\n",
+       " '000000530854',\n",
+       " '000000351810',\n",
+       " '000000338624',\n",
+       " '000000138492',\n",
+       " '000000263463',\n",
+       " '000000226592',\n",
+       " '000000378454',\n",
+       " '000000020059',\n",
+       " '000000227686',\n",
+       " '000000476215',\n",
+       " '000000297698',\n",
+       " '000000247917',\n",
+       " '000000439522',\n",
+       " '000000479448',\n",
+       " '000000424721',\n",
+       " '000000026690',\n",
+       " '000000558854',\n",
+       " '000000176901',\n",
+       " '000000334767',\n",
+       " '000000301563',\n",
+       " '000000086755',\n",
+       " '000000194471',\n",
+       " '000000420281',\n",
+       " '000000533206',\n",
+       " '000000099810',\n",
+       " '000000334483',\n",
+       " '000000089670',\n",
+       " '000000482275',\n",
+       " '000000404805',\n",
+       " '000000002261',\n",
+       " '000000425702',\n",
+       " '000000036844',\n",
+       " '000000012576',\n",
+       " '000000361238',\n",
+       " '000000108253',\n",
+       " '000000319935',\n",
+       " '000000003934',\n",
+       " '000000029596',\n",
+       " '000000047740',\n",
+       " '000000077460',\n",
+       " '000000014439',\n",
+       " '000000571893',\n",
+       " '000000447314',\n",
+       " '000000181303',\n",
+       " '000000058350',\n",
+       " '000000026465',\n",
+       " '000000246968',\n",
+       " '000000536947',\n",
+       " '000000076731',\n",
+       " '000000286182',\n",
+       " '000000433980',\n",
+       " '000000561366',\n",
+       " '000000380913',\n",
+       " '000000032887',\n",
+       " '000000517687',\n",
+       " '000000213035',\n",
+       " '000000399205',\n",
+       " '000000349837',\n",
+       " '000000350002',\n",
+       " '000000131431',\n",
+       " '000000356248',\n",
+       " '000000334399',\n",
+       " '000000057150',\n",
+       " '000000363666',\n",
+       " '000000507235',\n",
+       " '000000169996',\n",
+       " '000000226417',\n",
+       " '000000481573',\n",
+       " '000000056127',\n",
+       " '000000123480',\n",
+       " '000000274687',\n",
+       " '000000164637',\n",
+       " '000000178028',\n",
+       " '000000493286',\n",
+       " '000000348216',\n",
+       " '000000345027',\n",
+       " '000000571804',\n",
+       " '000000140658',\n",
+       " '000000102644',\n",
+       " '000000581615',\n",
+       " '000000279887',\n",
+       " '000000230008',\n",
+       " '000000284698',\n",
+       " '000000102356',\n",
+       " '000000456394',\n",
+       " '000000323709',\n",
+       " '000000452122',\n",
+       " '000000579158',\n",
+       " '000000525322',\n",
+       " '000000033114',\n",
+       " '000000008690',\n",
+       " '000000381639',\n",
+       " '000000217614',\n",
+       " '000000284445',\n",
+       " '000000468124',\n",
+       " '000000187144',\n",
+       " '000000273198',\n",
+       " '000000095843',\n",
+       " '000000417779',\n",
+       " '000000447342',\n",
+       " '000000166563',\n",
+       " '000000490125',\n",
+       " '000000561009',\n",
+       " '000000183675',\n",
+       " '000000290248',\n",
+       " '000000532058',\n",
+       " '000000214200',\n",
+       " '000000578093',\n",
+       " '000000369751',\n",
+       " '000000429011',\n",
+       " '000000301061',\n",
+       " '000000105264',\n",
+       " '000000267434',\n",
+       " '000000370711',\n",
+       " '000000025393',\n",
+       " '000000471087',\n",
+       " '000000106757',\n",
+       " '000000183648',\n",
+       " '000000358525',\n",
+       " '000000049269',\n",
+       " '000000079144',\n",
+       " '000000519688',\n",
+       " '000000431727',\n",
+       " '000000130699',\n",
+       " '000000215245',\n",
+       " '000000091921',\n",
+       " '000000218424',\n",
+       " '000000473974',\n",
+       " '000000405249',\n",
+       " '000000235784',\n",
+       " '000000521540',\n",
+       " '000000537506',\n",
+       " '000000119445',\n",
+       " '000000507015',\n",
+       " '000000173830',\n",
+       " '000000356498',\n",
+       " '000000435081',\n",
+       " '000000018575',\n",
+       " '000000373315',\n",
+       " '000000227765',\n",
+       " '000000013546',\n",
+       " '000000067310',\n",
+       " '000000125936',\n",
+       " '000000389109',\n",
+       " '000000322211',\n",
+       " '000000184384',\n",
+       " '000000426329',\n",
+       " '000000128476',\n",
+       " '000000414034',\n",
+       " '000000450488',\n",
+       " '000000099182',\n",
+       " '000000051738',\n",
+       " '000000099039',\n",
+       " '000000075456',\n",
+       " '000000134882',\n",
+       " '000000442323',\n",
+       " '000000232489',\n",
+       " '000000351823',\n",
+       " '000000065736',\n",
+       " '000000001000',\n",
+       " '000000379842',\n",
+       " '000000013923',\n",
+       " '000000559543',\n",
+       " '000000185890',\n",
+       " '000000357978',\n",
+       " '000000129492',\n",
+       " '000000261097',\n",
+       " '000000410510',\n",
+       " '000000039951',\n",
+       " '000000306700',\n",
+       " '000000146457',\n",
+       " '000000214224',\n",
+       " '000000332845',\n",
+       " '000000255483',\n",
+       " '000000222455',\n",
+       " '000000187271',\n",
+       " '000000462629',\n",
+       " '000000544565',\n",
+       " '000000369771',\n",
+       " '000000035963',\n",
+       " '000000289516',\n",
+       " '000000334309',\n",
+       " '000000452084',\n",
+       " '000000301718',\n",
+       " '000000429598',\n",
+       " '000000165257',\n",
+       " '000000093437',\n",
+       " '000000413552',\n",
+       " '000000062025',\n",
+       " '000000017379',\n",
+       " '000000176778',\n",
+       " '000000104572',\n",
+       " '000000090108',\n",
+       " '000000157124',\n",
+       " '000000089556',\n",
+       " '000000266206',\n",
+       " '000000086220',\n",
+       " '000000508602',\n",
+       " ...]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "rollback_dataset = dataset.transform(\"id_from_image_name\")\n",
+    "get_ids(rollback_dataset, subsets[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a1e290fe",
+   "metadata": {},
+   "source": [
+    "### Transform annotation\n",
     "\n",
-    "ids = get_ids(rollback_dataset, subsets[0])\n",
-    "print(dataset)"
+    "For the task-chanining or merging multiple heterogeneous datasets, we need to redefine the class definition. Datumaro provides this class redefinition through `remap_labels` as below. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 18,
    "id": "a2515d03",
    "metadata": {},
    "outputs": [
     {
-     "ename": "TypeError",
-     "evalue": "__init__() got an unexpected keyword argument 'regex'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/core/formatters.py:706\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    699\u001b[0m stream \u001b[39m=\u001b[39m StringIO()\n\u001b[1;32m    700\u001b[0m printer \u001b[39m=\u001b[39m pretty\u001b[39m.\u001b[39mRepresentationPrinter(stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose,\n\u001b[1;32m    701\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_width, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnewline,\n\u001b[1;32m    702\u001b[0m     max_seq_length\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_seq_length,\n\u001b[1;32m    703\u001b[0m     singleton_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msingleton_printers,\n\u001b[1;32m    704\u001b[0m     type_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtype_printers,\n\u001b[1;32m    705\u001b[0m     deferred_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 706\u001b[0m printer\u001b[39m.\u001b[39;49mpretty(obj)\n\u001b[1;32m    707\u001b[0m printer\u001b[39m.\u001b[39mflush()\n\u001b[1;32m    708\u001b[0m \u001b[39mreturn\u001b[39;00m stream\u001b[39m.\u001b[39mgetvalue()\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m    407\u001b[0m                         \u001b[39mreturn\u001b[39;00m meth(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m    408\u001b[0m                 \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mobject\u001b[39m \\\n\u001b[1;32m    409\u001b[0m                         \u001b[39mand\u001b[39;00m callable(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39m__repr__\u001b[39m\u001b[39m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m                     \u001b[39mreturn\u001b[39;00m _repr_pprint(obj, \u001b[39mself\u001b[39;49m, cycle)\n\u001b[1;32m    412\u001b[0m     \u001b[39mreturn\u001b[39;00m _default_pprint(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m    413\u001b[0m \u001b[39mfinally\u001b[39;00m:\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m    776\u001b[0m \u001b[39m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m    777\u001b[0m \u001b[39m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mrepr\u001b[39;49m(obj)\n\u001b[1;32m    779\u001b[0m lines \u001b[39m=\u001b[39m output\u001b[39m.\u001b[39msplitlines()\n\u001b[1;32m    780\u001b[0m \u001b[39mwith\u001b[39;00m p\u001b[39m.\u001b[39mgroup():\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m    843\u001b[0m     separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    844\u001b[0m     \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    846\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    847\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    848\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    849\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    850\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    851\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    852\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    853\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    854\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    855\u001b[0m     )\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m    569\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m    571\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m    363\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m         \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m    365\u001b[0m             \u001b[39mpass\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m    368\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m    369\u001b[0m         \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m    370\u001b[0m         \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m         \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m    372\u001b[0m             \u001b[39myield\u001b[39;00m item\n\u001b[1;32m    373\u001b[0m     \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m     transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m    452\u001b[0m     \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m    453\u001b[0m         \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m    454\u001b[0m         \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m    455\u001b[0m         \u001b[39mpass\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.<locals>._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m    400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m     source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m    402\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m    404\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n",
-      "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'"
-     ]
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=123287\n",
+       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=122218\n",
+       "\tannotations_count=1018861\n",
+       "subsets\n",
+       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -216,80 +3243,175 @@
     "remap_label_dataset"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "4335c84d",
+   "metadata": {},
+   "source": [
+    "### Split datasets\n",
+    "\n",
+    "From now on, we are going to give examples of extracting the subset of the imported dataset and splitting this into multiple subsets. Datumaro provides two types of splitter; one is the per-sample level random splitter from the given ratio of subsets and the other is the task-specific splitter under consideration of annotation instances.\n",
+    "\n",
+    "We first extract the validation dataset and split this into multiple cross-validation datasets."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "id": "d0bcd69e",
+   "execution_count": 20,
+   "id": "96a8e001",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset\n",
-      "\tsize=123287\n",
-      "\tsource_path=coco_dataset\n",
-      "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
-      "\tannotated_items_count=122218\n",
-      "\tannotations_count=1018861\n",
-      "subsets\n",
-      "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
-      "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
-      "categories\n",
-      "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
-      "\n"
-     ]
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=5000\n",
+       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=4952\n",
+       "\tannotations_count=41866\n",
+       "subsets\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "print(dataset)"
+    "from datumaro.components.dataset import Dataset\n",
+    "\n",
+    "val_dataset = dataset.filter('/item[subset=\"val2017\"]') # or Dataset(dataset.get_subset(subsets[0]))\n",
+    "val_dataset"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "id": "96a8e001",
+   "execution_count": 21,
+   "id": "97d25c76",
    "metadata": {},
    "outputs": [
     {
-     "ename": "TypeError",
-     "evalue": "__init__() got an unexpected keyword argument 'regex'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn [45], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m strr \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m|\u001b[39m\u001b[39m\\1\u001b[39;00m\u001b[39m|^image_|\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m      2\u001b[0m renamed_dataset \u001b[39m=\u001b[39m dataset\u001b[39m.\u001b[39mtransform(\u001b[39m\"\u001b[39m\u001b[39mrename\u001b[39m\u001b[39m\"\u001b[39m, regex\u001b[39m=\u001b[39mstrr)\n\u001b[0;32m----> 3\u001b[0m \u001b[39mprint\u001b[39;49m(renamed_dataset)\n\u001b[1;32m      5\u001b[0m \u001b[39m# ids = get_ids(dataset, subsets[0])\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[39m# print('val2017', ids)\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m    843\u001b[0m     separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    844\u001b[0m     \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    846\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    847\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    848\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    849\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    850\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    851\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    852\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    853\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    854\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m    855\u001b[0m     )\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m    569\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m         \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m    571\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m    363\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m         \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m    365\u001b[0m             \u001b[39mpass\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m    368\u001b[0m     \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m    369\u001b[0m         \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m    370\u001b[0m         \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m         \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m    372\u001b[0m             \u001b[39myield\u001b[39;00m item\n\u001b[1;32m    373\u001b[0m     \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m    450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m     transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m    452\u001b[0m     \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m    453\u001b[0m         \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m    454\u001b[0m         \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m    455\u001b[0m         \u001b[39mpass\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.<locals>._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m    399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m    400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m     source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m    402\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m    404\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n",
-      "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'"
-     ]
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=5000\n",
+       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=4952\n",
+       "\tannotations_count=41866\n",
+       "subsets\n",
+       "\tval1: # of items=1000, # of annotated items=993, # of annotations=8365, annotation types=['polygon', 'mask']\n",
+       "\tval2: # of items=1000, # of annotated items=990, # of annotations=8161, annotation types=['polygon', 'mask']\n",
+       "\tval3: # of items=1000, # of annotated items=989, # of annotations=8390, annotation types=['polygon', 'mask']\n",
+       "\tval4: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n",
+       "\tval5: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "# strr = '|\\1|^image_|'\n",
-    "# renamed_dataset = dataset.transform(\"rename\", regex=strr)\n",
-    "# print(renamed_dataset)\n",
-    "\n",
-    "# ids = get_ids(dataset, subsets[0])\n",
-    "# print('val2017', ids)"
+    "splits = ((\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2))\n",
+    "crossval_dataset = val_dataset.transform(\"random_split\", splits=splits)\n",
+    "crossval_dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "43e95f07",
+   "metadata": {},
+   "source": [
+    "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample throguh. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "4ed4a847",
+   "execution_count": 22,
+   "id": "f2cee2b8",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=5000\n",
+       "\tsource_path=None\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=4952\n",
+       "\tannotations_count=41866\n",
+       "subsets\n",
+       "\tval1: # of items=1035, # of annotated items=1035, # of annotations=8374, annotation types=['polygon', 'mask']\n",
+       "\tval2: # of items=1000, # of annotated items=997, # of annotations=8376, annotation types=['polygon', 'mask']\n",
+       "\tval3: # of items=1000, # of annotated items=959, # of annotations=8366, annotation types=['polygon', 'mask']\n",
+       "\tval4: # of items=927, # of annotated items=923, # of annotations=8376, annotation types=['polygon', 'mask']\n",
+       "\tval5: # of items=1038, # of annotated items=1038, # of annotations=8374, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "from datumaro.components.visualizer import Visualizer\n",
+    "import datumaro.plugins.splitter as splitter\n",
+    "\n",
+    "task = splitter.SplitTask.segmentation.name\n",
+    "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n",
     "\n",
-    "visualizer = Visualizer(dataset, figsize=(8, 8), alpha=0.7)\n",
-    "fig = visualizer.vis_gallery(ids[:4], subsets[0], (2, 2))\n",
-    "fig.show()"
+    "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n",
+    "crossval_per_ann_dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "274ff8ed",
+   "metadata": {},
+   "source": [
+    "Lastly, we can rename the subset as below. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "f5acac08",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=5000\n",
+       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=4952\n",
+       "\tannotations_count=41866\n",
+       "subsets\n",
+       "\ttest: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n",
+       "\ttrain: # of items=3000, # of annotated items=2972, # of annotations=24916, annotation types=['polygon', 'mask']\n",
+       "\tval: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mapping={\"val1\": \"train\", \"val2\": \"train\", \"val3\": \"train\", \"val4\": \"val\", \"val5\": \"test\"}\n",
+    "test_dataset = dataset.transform(\"map_subsets\", mapping=mapping)\n",
+    "test_dataset"
    ]
   }
  ],

From c35986cfe7a654569a492d4dcde80cd5b82fb903 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Fri, 11 Nov 2022 00:31:33 +0900
Subject: [PATCH 3/8] update changelog

---
 CHANGELOG.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 75a708f225..a5feafcbb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Support for exclusive of labels with LabelGroup
   (<https://github.com/openvinotoolkit/datumaro/pull/742>)
-- Add jupyter sample introducing how to merge datasets
+- Add jupyter samples 
+  - introducing how to merge datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/738>)
+  - introducing how to visualize dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/747>)
   - introducing how to filter dataset
   (<https://github.com/openvinotoolkit/datumaro/pull/748>)
+  - introducing how to transform dataset
+  (<https://github.com/openvinotoolkit/datumaro/pull/759>)
 - Add Visualization Python API
   - Bbox
     (<https://github.com/openvinotoolkit/datumaro/pull/744>)

From 0d3a4dc2cacd4a79473912af792aae1dce99e58b Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Fri, 11 Nov 2022 00:36:38 +0900
Subject: [PATCH 4/8] update notebook

---
 notebooks/05_transform.ipynb | 84 ++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
index a0d465d8ab..6fae04bbda 100644
--- a/notebooks/05_transform.ipynb
+++ b/notebooks/05_transform.ipynb
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 1,
    "id": "da198c67",
    "metadata": {},
    "outputs": [
@@ -37,12 +37,12 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
+      "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
      ]
     },
     {
@@ -57,7 +57,7 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=123287\n",
-       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=122218\n",
        "\tannotations_count=1018861\n",
@@ -68,7 +68,7 @@
        "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -99,7 +99,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 2,
    "id": "d38cfc9b",
    "metadata": {
     "scrolled": true
@@ -1118,7 +1118,7 @@
        " ...]"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1148,7 +1148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 3,
    "id": "51bf3388",
    "metadata": {},
    "outputs": [
@@ -2158,7 +2158,7 @@
        " ...]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2178,7 +2178,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 4,
    "id": "fb608396",
    "metadata": {
     "scrolled": true
@@ -3190,7 +3190,7 @@
        " ...]"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3212,7 +3212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
    "id": "a2515d03",
    "metadata": {},
    "outputs": [
@@ -3221,7 +3221,7 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=123287\n",
-       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=122218\n",
        "\tannotations_count=1018861\n",
@@ -3232,7 +3232,7 @@
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3257,7 +3257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
    "id": "96a8e001",
    "metadata": {},
    "outputs": [
@@ -3266,7 +3266,7 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=5000\n",
-       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
@@ -3276,7 +3276,7 @@
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3290,7 +3290,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 7,
    "id": "97d25c76",
    "metadata": {},
    "outputs": [
@@ -3299,21 +3299,21 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=5000\n",
-       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\tval1: # of items=1000, # of annotated items=993, # of annotations=8365, annotation types=['polygon', 'mask']\n",
-       "\tval2: # of items=1000, # of annotated items=990, # of annotations=8161, annotation types=['polygon', 'mask']\n",
-       "\tval3: # of items=1000, # of annotated items=989, # of annotations=8390, annotation types=['polygon', 'mask']\n",
-       "\tval4: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n",
-       "\tval5: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n",
+       "\tval1: # of items=1000, # of annotated items=993, # of annotations=8237, annotation types=['polygon', 'mask']\n",
+       "\tval2: # of items=1000, # of annotated items=989, # of annotations=8542, annotation types=['polygon', 'mask']\n",
+       "\tval3: # of items=1000, # of annotated items=986, # of annotations=8237, annotation types=['polygon', 'mask']\n",
+       "\tval4: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n",
+       "\tval5: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3334,7 +3334,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 8,
    "id": "f2cee2b8",
    "metadata": {},
    "outputs": [
@@ -3348,16 +3348,16 @@
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\tval1: # of items=1035, # of annotated items=1035, # of annotations=8374, annotation types=['polygon', 'mask']\n",
-       "\tval2: # of items=1000, # of annotated items=997, # of annotations=8376, annotation types=['polygon', 'mask']\n",
-       "\tval3: # of items=1000, # of annotated items=959, # of annotations=8366, annotation types=['polygon', 'mask']\n",
-       "\tval4: # of items=927, # of annotated items=923, # of annotations=8376, annotation types=['polygon', 'mask']\n",
-       "\tval5: # of items=1038, # of annotated items=1038, # of annotations=8374, annotation types=['polygon', 'mask']\n",
+       "\tval1: # of items=1029, # of annotated items=1029, # of annotations=8381, annotation types=['polygon', 'mask']\n",
+       "\tval2: # of items=1000, # of annotated items=975, # of annotations=8374, annotation types=['polygon', 'mask']\n",
+       "\tval3: # of items=1009, # of annotated items=1009, # of annotations=8376, annotation types=['polygon', 'mask']\n",
+       "\tval4: # of items=966, # of annotated items=943, # of annotations=8374, annotation types=['polygon', 'mask']\n",
+       "\tval5: # of items=996, # of annotated items=996, # of annotations=8361, annotation types=['polygon', 'mask']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3382,7 +3382,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 9,
    "id": "f5acac08",
    "metadata": {},
    "outputs": [
@@ -3391,19 +3391,19 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=5000\n",
-       "\tsource_path=/home/wonju/data/datasets/coco_dataset\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\ttest: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n",
-       "\ttrain: # of items=3000, # of annotated items=2972, # of annotations=24916, annotation types=['polygon', 'mask']\n",
-       "\tval: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n",
+       "\ttest: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n",
+       "\ttrain: # of items=3000, # of annotated items=2968, # of annotations=25016, annotation types=['polygon', 'mask']\n",
+       "\tval: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }

From f01b55fd2e462c5b6aca3e76c2075974c0350ef8 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Fri, 11 Nov 2022 23:12:15 +0900
Subject: [PATCH 5/8] pylint resolve

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5feafcbb9..69e3b48672 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Support for exclusive of labels with LabelGroup
   (<https://github.com/openvinotoolkit/datumaro/pull/742>)
-- Add jupyter samples 
+- Add jupyter samples
   - introducing how to merge datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/738>)
   - introducing how to visualize dataset

From f958bdfe89f2e107bcfff9eb44989fa94f69b925 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Tue, 15 Nov 2022 18:40:38 +0900
Subject: [PATCH 6/8] correct a notebook for transform api

---
 notebooks/05_transform.ipynb | 62 +++++++++++++-----------------------
 1 file changed, 22 insertions(+), 40 deletions(-)

diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
index 6fae04bbda..b3569a9a62 100644
--- a/notebooks/05_transform.ipynb
+++ b/notebooks/05_transform.ipynb
@@ -37,40 +37,22 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
-      "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
+      "2022-11-15 18:27:33.901721: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "WARNING:root:Failed to import module 'datumaro.plugins.openvino_plugin.launcher': libpython3.8.so.1.0: cannot open shared object file: No such file or directory\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Representation for sample COCO dataset\n"
+     "ename": "Exception",
+     "evalue": "Failed to find 'coco' dataset at 'coco_dataset'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn [1], line 8\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatumaro\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdm\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m dataset \u001b[39m=\u001b[39m dm\u001b[39m.\u001b[39;49mDataset\u001b[39m.\u001b[39;49mimport_from(\u001b[39m'\u001b[39;49m\u001b[39mcoco_dataset\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcoco_instances\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m     10\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mRepresentation for sample COCO dataset\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m     11\u001b[0m dataset\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:1238\u001b[0m, in \u001b[0;36mDataset.import_from\u001b[0;34m(cls, path, format, env, progress_reporter, error_policy, **kwargs)\u001b[0m\n\u001b[1;32m   1236\u001b[0m     importer \u001b[39m=\u001b[39m env\u001b[39m.\u001b[39mmake_importer(\u001b[39mformat\u001b[39m)\n\u001b[1;32m   1237\u001b[0m     \u001b[39mwith\u001b[39;00m logging_disabled(log\u001b[39m.\u001b[39mINFO):\n\u001b[0;32m-> 1238\u001b[0m         detected_sources \u001b[39m=\u001b[39m importer(path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1239\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mformat\u001b[39m \u001b[39min\u001b[39;00m env\u001b[39m.\u001b[39mextractors:\n\u001b[1;32m   1240\u001b[0m     detected_sources \u001b[39m=\u001b[39m [{\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: path, \u001b[39m\"\u001b[39m\u001b[39mformat\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mformat\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39moptions\u001b[39m\u001b[39m\"\u001b[39m: kwargs}]\n",
+      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/plugins/coco_format/importer.py:68\u001b[0m, in \u001b[0;36mCocoImporter.__call__\u001b[0;34m(self, path, **extra_params)\u001b[0m\n\u001b[1;32m     65\u001b[0m subsets \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfind_sources(path)\n\u001b[1;32m     67\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(subsets) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m---> 68\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mException\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mFailed to find \u001b[39m\u001b[39m'\u001b[39m\u001b[39mcoco\u001b[39m\u001b[39m'\u001b[39m\u001b[39m dataset at \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m path)\n\u001b[1;32m     70\u001b[0m \u001b[39m# TODO: should be removed when proper label merging is implemented\u001b[39;00m\n\u001b[1;32m     71\u001b[0m conflicting_types \u001b[39m=\u001b[39m {\n\u001b[1;32m     72\u001b[0m     CocoTask\u001b[39m.\u001b[39minstances,\n\u001b[1;32m     73\u001b[0m     CocoTask\u001b[39m.\u001b[39mperson_keypoints,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     76\u001b[0m     CocoTask\u001b[39m.\u001b[39mstuff,\n\u001b[1;32m     77\u001b[0m }\n",
+      "\u001b[0;31mException\u001b[0m: Failed to find 'coco' dataset at 'coco_dataset'"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Dataset\n",
-       "\tsize=123287\n",
-       "\tsource_path=coco_dataset\n",
-       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
-       "\tannotated_items_count=122218\n",
-       "\tannotations_count=1018861\n",
-       "subsets\n",
-       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
-       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
-       "categories\n",
-       "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -78,7 +60,6 @@
     "#\n",
     "# SPDX-License-Identifier: MIT\n",
     "\n",
-    "import os\n",
     "import datumaro as dm\n",
     "\n",
     "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n",
@@ -99,7 +80,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "d38cfc9b",
    "metadata": {
     "scrolled": true
@@ -1148,7 +1129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "51bf3388",
    "metadata": {},
    "outputs": [
@@ -2178,7 +2159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "fb608396",
    "metadata": {
     "scrolled": true
@@ -3212,7 +3193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "a2515d03",
    "metadata": {},
    "outputs": [
@@ -3257,7 +3238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "96a8e001",
    "metadata": {},
    "outputs": [
@@ -3290,7 +3271,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "97d25c76",
    "metadata": {},
    "outputs": [
@@ -3329,12 +3310,12 @@
    "id": "43e95f07",
    "metadata": {},
    "source": [
-    "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample throguh. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations."
+    "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "f2cee2b8",
    "metadata": {},
    "outputs": [
@@ -3368,6 +3349,7 @@
     "task = splitter.SplitTask.segmentation.name\n",
     "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n",
     "\n",
+    "crossval_per_ann_dataset = val_dataset.transform(\"split\", task=task, splits=splits)\n",
     "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n",
     "crossval_per_ann_dataset"
    ]
@@ -3382,7 +3364,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "f5acac08",
    "metadata": {},
    "outputs": [

From 5e58ea450eabdf3f1fb1be3ddbcce40b6b2880af Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Tue, 15 Nov 2022 18:42:21 +0900
Subject: [PATCH 7/8] correct a notebook for transform api

---
 notebooks/05_transform.ipynb | 111 ++++++++++++++++++++---------------
 1 file changed, 64 insertions(+), 47 deletions(-)

diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
index b3569a9a62..fc6fdd9f72 100644
--- a/notebooks/05_transform.ipynb
+++ b/notebooks/05_transform.ipynb
@@ -29,7 +29,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "da198c67",
    "metadata": {},
    "outputs": [
@@ -37,22 +37,40 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-11-15 18:27:33.901721: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "WARNING:root:Failed to import module 'datumaro.plugins.openvino_plugin.launcher': libpython3.8.so.1.0: cannot open shared object file: No such file or directory\n"
+      "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n",
+      "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n"
      ]
     },
     {
-     "ename": "Exception",
-     "evalue": "Failed to find 'coco' dataset at 'coco_dataset'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn [1], line 8\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[1;32m      6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatumaro\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdm\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m dataset \u001b[39m=\u001b[39m dm\u001b[39m.\u001b[39;49mDataset\u001b[39m.\u001b[39;49mimport_from(\u001b[39m'\u001b[39;49m\u001b[39mcoco_dataset\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcoco_instances\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m     10\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mRepresentation for sample COCO dataset\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m     11\u001b[0m dataset\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:1238\u001b[0m, in \u001b[0;36mDataset.import_from\u001b[0;34m(cls, path, format, env, progress_reporter, error_policy, **kwargs)\u001b[0m\n\u001b[1;32m   1236\u001b[0m     importer \u001b[39m=\u001b[39m env\u001b[39m.\u001b[39mmake_importer(\u001b[39mformat\u001b[39m)\n\u001b[1;32m   1237\u001b[0m     \u001b[39mwith\u001b[39;00m logging_disabled(log\u001b[39m.\u001b[39mINFO):\n\u001b[0;32m-> 1238\u001b[0m         detected_sources \u001b[39m=\u001b[39m importer(path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m   1239\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mformat\u001b[39m \u001b[39min\u001b[39;00m env\u001b[39m.\u001b[39mextractors:\n\u001b[1;32m   1240\u001b[0m     detected_sources \u001b[39m=\u001b[39m [{\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: path, \u001b[39m\"\u001b[39m\u001b[39mformat\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mformat\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39moptions\u001b[39m\u001b[39m\"\u001b[39m: kwargs}]\n",
-      "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/plugins/coco_format/importer.py:68\u001b[0m, in \u001b[0;36mCocoImporter.__call__\u001b[0;34m(self, path, **extra_params)\u001b[0m\n\u001b[1;32m     65\u001b[0m subsets \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfind_sources(path)\n\u001b[1;32m     67\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(subsets) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m---> 68\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mException\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mFailed to find \u001b[39m\u001b[39m'\u001b[39m\u001b[39mcoco\u001b[39m\u001b[39m'\u001b[39m\u001b[39m dataset at \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m path)\n\u001b[1;32m     70\u001b[0m \u001b[39m# TODO: should be removed when proper label merging is implemented\u001b[39;00m\n\u001b[1;32m     71\u001b[0m conflicting_types \u001b[39m=\u001b[39m {\n\u001b[1;32m     72\u001b[0m     CocoTask\u001b[39m.\u001b[39minstances,\n\u001b[1;32m     73\u001b[0m     CocoTask\u001b[39m.\u001b[39mperson_keypoints,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     76\u001b[0m     CocoTask\u001b[39m.\u001b[39mstuff,\n\u001b[1;32m     77\u001b[0m }\n",
-      "\u001b[0;31mException\u001b[0m: Failed to find 'coco' dataset at 'coco_dataset'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Representation for sample COCO dataset\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Dataset\n",
+       "\tsize=123287\n",
+       "\tsource_path=coco_dataset\n",
+       "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
+       "\tannotated_items_count=122218\n",
+       "\tannotations_count=1018861\n",
+       "subsets\n",
+       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['mask', 'polygon']\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n",
+       "categories\n",
+       "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -80,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "id": "d38cfc9b",
    "metadata": {
     "scrolled": true
@@ -1099,7 +1117,7 @@
        " ...]"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1129,7 +1147,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "51bf3388",
    "metadata": {},
    "outputs": [
@@ -2139,7 +2157,7 @@
        " ...]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2159,7 +2177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "fb608396",
    "metadata": {
     "scrolled": true
@@ -3171,7 +3189,7 @@
        " ...]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3193,7 +3211,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "id": "a2515d03",
    "metadata": {},
    "outputs": [
@@ -3207,13 +3225,13 @@
        "\tannotated_items_count=122218\n",
        "\tannotations_count=1018861\n",
        "subsets\n",
-       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n",
-       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+       "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['mask', 'polygon']\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3238,7 +3256,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "96a8e001",
    "metadata": {},
    "outputs": [
@@ -3252,12 +3270,12 @@
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n",
+       "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3271,7 +3289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "97d25c76",
    "metadata": {},
    "outputs": [
@@ -3285,16 +3303,16 @@
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\tval1: # of items=1000, # of annotated items=993, # of annotations=8237, annotation types=['polygon', 'mask']\n",
-       "\tval2: # of items=1000, # of annotated items=989, # of annotations=8542, annotation types=['polygon', 'mask']\n",
-       "\tval3: # of items=1000, # of annotated items=986, # of annotations=8237, annotation types=['polygon', 'mask']\n",
-       "\tval4: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n",
-       "\tval5: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n",
+       "\tval1: # of items=1000, # of annotated items=991, # of annotations=8344, annotation types=['mask', 'polygon']\n",
+       "\tval2: # of items=1000, # of annotated items=991, # of annotations=7646, annotation types=['mask', 'polygon']\n",
+       "\tval3: # of items=1000, # of annotated items=993, # of annotations=8625, annotation types=['mask', 'polygon']\n",
+       "\tval4: # of items=1000, # of annotated items=986, # of annotations=8752, annotation types=['mask', 'polygon']\n",
+       "\tval5: # of items=1000, # of annotated items=991, # of annotations=8499, annotation types=['mask', 'polygon']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3315,7 +3333,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "id": "f2cee2b8",
    "metadata": {},
    "outputs": [
@@ -3324,21 +3342,21 @@
       "text/plain": [
        "Dataset\n",
        "\tsize=5000\n",
-       "\tsource_path=None\n",
+       "\tsource_path=coco_dataset\n",
        "\tmedia_type=<class 'datumaro.components.media.Image'>\n",
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\tval1: # of items=1029, # of annotated items=1029, # of annotations=8381, annotation types=['polygon', 'mask']\n",
-       "\tval2: # of items=1000, # of annotated items=975, # of annotations=8374, annotation types=['polygon', 'mask']\n",
-       "\tval3: # of items=1009, # of annotated items=1009, # of annotations=8376, annotation types=['polygon', 'mask']\n",
-       "\tval4: # of items=966, # of annotated items=943, # of annotations=8374, annotation types=['polygon', 'mask']\n",
-       "\tval5: # of items=996, # of annotated items=996, # of annotations=8361, annotation types=['polygon', 'mask']\n",
+       "\tval1: # of items=1000, # of annotated items=1000, # of annotations=8368, annotation types=['mask', 'polygon']\n",
+       "\tval2: # of items=967, # of annotated items=919, # of annotations=8374, annotation types=['mask', 'polygon']\n",
+       "\tval3: # of items=1032, # of annotated items=1032, # of annotations=8374, annotation types=['mask', 'polygon']\n",
+       "\tval4: # of items=987, # of annotated items=987, # of annotations=8376, annotation types=['mask', 'polygon']\n",
+       "\tval5: # of items=1014, # of annotated items=1014, # of annotations=8374, annotation types=['mask', 'polygon']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3350,7 +3368,6 @@
     "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n",
     "\n",
     "crossval_per_ann_dataset = val_dataset.transform(\"split\", task=task, splits=splits)\n",
-    "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n",
     "crossval_per_ann_dataset"
    ]
   },
@@ -3364,7 +3381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "id": "f5acac08",
    "metadata": {},
    "outputs": [
@@ -3378,14 +3395,14 @@
        "\tannotated_items_count=4952\n",
        "\tannotations_count=41866\n",
        "subsets\n",
-       "\ttest: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n",
-       "\ttrain: # of items=3000, # of annotated items=2968, # of annotations=25016, annotation types=['polygon', 'mask']\n",
-       "\tval: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n",
+       "\ttest: # of items=1014, # of annotated items=1014, # of annotations=8374, annotation types=['mask', 'polygon']\n",
+       "\ttrain: # of items=2999, # of annotated items=2951, # of annotations=25116, annotation types=['mask', 'polygon']\n",
+       "\tval: # of items=987, # of annotated items=987, # of annotations=8376, annotation types=['mask', 'polygon']\n",
        "categories\n",
        "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }

From 74560f27195e31c1c05914025c4904d45217ff26 Mon Sep 17 00:00:00 2001
From: Wonju Lee <wonju.lee@intel.com>
Date: Tue, 15 Nov 2022 18:43:56 +0900
Subject: [PATCH 8/8] correct typo

---
 notebooks/05_transform.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb
index fc6fdd9f72..439e2b5854 100644
--- a/notebooks/05_transform.ipynb
+++ b/notebooks/05_transform.ipynb
@@ -3328,7 +3328,7 @@
    "id": "43e95f07",
    "metadata": {},
    "source": [
-    "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations."
+    "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through a task-specific splitter. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations."
    ]
   },
   {