From 945b5f723f9d412544f19aead67e7301e1a92ed4 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Mon, 7 Nov 2022 23:46:21 +0900 Subject: [PATCH 1/8] add notebooks for transform --- notebooks/05_transform.ipynb | 322 +++++++++++++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 notebooks/05_transform.ipynb diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb new file mode 100644 index 0000000000..8780c8203d --- /dev/null +++ b/notebooks/05_transform.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "652f6b58", + "metadata": {}, + "source": [ + "# Transform datasets" + ] + }, + { + "cell_type": "markdown", + "id": "ca821a19", + "metadata": {}, + "source": [ + "In this notebook example, we'll take a look at Datumaro transform api, where transform provides the task changes by modifying the annotation style, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "da198c67", + "metadata": {}, + "outputs": [], + "source": [ + "# Copyright (C) 2022 Intel Corporation\n", + "#\n", + "# SPDX-License-Identifier: MIT\n", + "\n", + "import os\n", + "import datumaro as dm" + ] + }, + { + "cell_type": "markdown", + "id": "9e2cf885", + "metadata": {}, + "source": [ + "### Filtered by subset" + ] + }, + { + "cell_type": "markdown", + "id": "031f1d62", + "metadata": {}, + "source": [ + "We export sample VOC dataset to filter only train subset." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "b9640838", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Representation for sample COCO dataset\n" + ] + }, + { + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=123287\n", + "\tsource_path=coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=122218\n", + "\tannotations_count=1018861\n", + "subsets\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n", + "\n", + "print('Representation for sample COCO dataset')\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "d38cfc9b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Subset candidates: ['val2017', 'train2017']\n" + ] + } + ], + "source": [ + "subsets = list(dataset.subsets().keys())\n", + "print(\"Subset candidates:\", subsets)\n", + "\n", + "def get_ids(dataset: dm.Dataset, subset: str):\n", + " ids = []\n", + " for item in dataset:\n", + " if item.subset == subset:\n", + " ids += [item.id]\n", + " \n", + " return ids\n", + "\n", + "ids = get_ids(dataset, subsets[0])" + ] + }, + { + "cell_type": "markdown", + "id": "db0e0346", + "metadata": {}, + "source": [ + "In VOC dataset, there are 'train' and 'test' subset. We will filter only 'train' subset." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "51bf3388", + "metadata": {}, + "outputs": [], + "source": [ + "# dataset.transform(\"masks_to_polygons\")\n", + "reindexing_dataset = dataset.transform(\"reindex\", start=0)\n", + "\n", + "ids = get_ids(reindexing_dataset, subsets[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "fb608396", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset\n", + "\tsize=123287\n", + "\tsource_path=coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=122218\n", + "\tannotations_count=1018861\n", + "subsets\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", + "\n" + ] + } + ], + "source": [ + "rollback_dataset = dataset.transform(\"id_from_image_name\")\n", + "\n", + "ids = get_ids(rollback_dataset, subsets[0])\n", + "print(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "a2515d03", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "__init__() got an unexpected keyword argument 'regex'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/core/formatters.py:706\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 699\u001b[0m stream \u001b[39m=\u001b[39m StringIO()\n\u001b[1;32m 700\u001b[0m printer \u001b[39m=\u001b[39m pretty\u001b[39m.\u001b[39mRepresentationPrinter(stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose,\n\u001b[1;32m 701\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_width, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnewline,\n\u001b[1;32m 702\u001b[0m max_seq_length\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_seq_length,\n\u001b[1;32m 703\u001b[0m singleton_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msingleton_printers,\n\u001b[1;32m 704\u001b[0m type_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtype_printers,\n\u001b[1;32m 705\u001b[0m deferred_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 706\u001b[0m printer\u001b[39m.\u001b[39;49mpretty(obj)\n\u001b[1;32m 707\u001b[0m printer\u001b[39m.\u001b[39mflush()\n\u001b[1;32m 708\u001b[0m \u001b[39mreturn\u001b[39;00m stream\u001b[39m.\u001b[39mgetvalue()\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[39mreturn\u001b[39;00m meth(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m 408\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mobject\u001b[39m \\\n\u001b[1;32m 409\u001b[0m \u001b[39mand\u001b[39;00m callable(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39m__repr__\u001b[39m\u001b[39m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m \u001b[39mreturn\u001b[39;00m _repr_pprint(obj, \u001b[39mself\u001b[39;49m, cycle)\n\u001b[1;32m 412\u001b[0m \u001b[39mreturn\u001b[39;00m _default_pprint(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m 413\u001b[0m \u001b[39mfinally\u001b[39;00m:\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[39m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m 777\u001b[0m \u001b[39m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mrepr\u001b[39;49m(obj)\n\u001b[1;32m 779\u001b[0m lines \u001b[39m=\u001b[39m output\u001b[39m.\u001b[39msplitlines()\n\u001b[1;32m 780\u001b[0m \u001b[39mwith\u001b[39;00m p\u001b[39m.\u001b[39mgroup():\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 843\u001b[0m separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 844\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 846\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 847\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 848\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 849\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 850\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 851\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 852\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 853\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 854\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 855\u001b[0m )\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m 569\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m 571\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 363\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m 365\u001b[0m \u001b[39mpass\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m 368\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 369\u001b[0m \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m 372\u001b[0m \u001b[39myield\u001b[39;00m item\n\u001b[1;32m 373\u001b[0m \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m 452\u001b[0m \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m 453\u001b[0m \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[39mpass\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m 400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m 402\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m 404\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n", + "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'" + ] + } + ], + "source": [ + "mapping = {\"motorcycle\": \"bicycle\", \"bus\": \"car\", \"truck\": \"car\"}\n", + "remap_label_dataset = dataset.transform(\"remap_labels\", mapping=mapping)\n", + "remap_label_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "d0bcd69e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset\n", + "\tsize=123287\n", + "\tsource_path=coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=122218\n", + "\tannotations_count=1018861\n", + "subsets\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", + "\n" + ] + } + ], + "source": [ + "print(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "96a8e001", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "__init__() got an unexpected keyword argument 'regex'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [45], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m strr \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m|\u001b[39m\u001b[39m\\1\u001b[39;00m\u001b[39m|^image_|\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 2\u001b[0m renamed_dataset \u001b[39m=\u001b[39m dataset\u001b[39m.\u001b[39mtransform(\u001b[39m\"\u001b[39m\u001b[39mrename\u001b[39m\u001b[39m\"\u001b[39m, regex\u001b[39m=\u001b[39mstrr)\n\u001b[0;32m----> 3\u001b[0m \u001b[39mprint\u001b[39;49m(renamed_dataset)\n\u001b[1;32m 5\u001b[0m \u001b[39m# ids = get_ids(dataset, subsets[0])\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39m# print('val2017', ids)\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 843\u001b[0m separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 844\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 846\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 847\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 848\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 849\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 850\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 851\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 852\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 853\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 854\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 855\u001b[0m )\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m 569\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m 571\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 363\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m 365\u001b[0m \u001b[39mpass\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m 368\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 369\u001b[0m \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m 372\u001b[0m \u001b[39myield\u001b[39;00m item\n\u001b[1;32m 373\u001b[0m \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m 452\u001b[0m \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m 453\u001b[0m \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[39mpass\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m 400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m 402\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m 404\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n", + "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'" + ] + } + ], + "source": [ + "# strr = '|\\1|^image_|'\n", + "# renamed_dataset = dataset.transform(\"rename\", regex=strr)\n", + "# print(renamed_dataset)\n", + "\n", + "# ids = get_ids(dataset, subsets[0])\n", + "# print('val2017', ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4ed4a847", + "metadata": {}, + "outputs": [], + "source": [ + "from datumaro.components.visualizer import Visualizer\n", + "\n", + "visualizer = Visualizer(dataset, figsize=(8, 8), alpha=0.7)\n", + "fig = visualizer.vis_gallery(ids[:4], subsets[0], (2, 2))\n", + "fig.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 ('datum')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "affff79ca1edacbf0919cffebb4fdcbe1cd4dfe1034cbc10ce20b177737f1c41" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 711b41b035cac6c61e397cce0c0b9a64a4da4915 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 11 Nov 2022 00:25:37 +0900 Subject: [PATCH 2/8] add nootebook example for transform api --- notebooks/05_transform.ipynb | 3378 ++++++++++++++++++++++++++++++++-- 1 file changed, 3250 insertions(+), 128 deletions(-) diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb index 8780c8203d..a0d465d8ab 100644 --- a/notebooks/05_transform.ipynb +++ b/notebooks/05_transform.ipynb @@ -13,54 +13,36 @@ "id": "ca821a19", "metadata": {}, "source": [ - "In this notebook example, we'll take a look at Datumaro transform api, where transform provides the task changes by modifying the annotation style, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "da198c67", - "metadata": {}, - "outputs": [], - "source": [ - "# Copyright (C) 2022 Intel Corporation\n", - "#\n", - "# SPDX-License-Identifier: MIT\n", - "\n", - "import os\n", - "import datumaro as dm" + "In this notebook example, we will take a look at Datumaro transform api, where transform provides splitting and merging subsets, redefining annotation information, reidentifying media, and task-changing with the modification of the annotation format, e.g., from masks to polygons, from bounding boxes to masks, from shapes to bounding boxes, etc." ] }, { "cell_type": "markdown", - "id": "9e2cf885", + "id": "bd9e52cf", "metadata": {}, "source": [ - "### Filtered by subset" - ] - }, - { - "cell_type": "markdown", - "id": "031f1d62", - "metadata": {}, - "source": [ - "We export sample VOC dataset to filter only train subset." + "## Prerequisite\n", + "### Download COCO 2017 validation dataset\n", + "\n", + "Please refer https://github.com/openvinotoolkit/datumaro/blob/develop/notebooks/03_visualize.ipynb for preparing COCO 2017 validation dataset." ] }, { "cell_type": "code", - "execution_count": 49, - "id": "b9640838", + "execution_count": 14, + "id": "da198c67", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" ] }, { @@ -75,7 +57,7 @@ "text/plain": [ "Dataset\n", "\tsize=123287\n", - "\tsource_path=coco_dataset\n", + "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=122218\n", "\tannotations_count=1018861\n", @@ -86,21 +68,38 @@ "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 49, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Copyright (C) 2022 Intel Corporation\n", + "#\n", + "# SPDX-License-Identifier: MIT\n", + "\n", + "import os\n", + "import datumaro as dm\n", + "\n", "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n", "\n", "print('Representation for sample COCO dataset')\n", "dataset" ] }, + { + "cell_type": "markdown", + "id": "50b11dc3", + "metadata": {}, + "source": [ + "### Transform media ID\n", + "\n", + "We first modify the `media_id` through transformation. The original `media_id` are given by below." + ] + }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 15, "id": "d38cfc9b", "metadata": { "scrolled": true @@ -112,6 +111,1016 @@ "text": [ "Subset candidates: ['val2017', 'train2017']\n" ] + }, + { + "data": { + "text/plain": [ + "['000000397133',\n", + " '000000037777',\n", + " '000000252219',\n", + " '000000087038',\n", + " '000000174482',\n", + " '000000403385',\n", + " '000000006818',\n", + " '000000480985',\n", + " '000000458054',\n", + " '000000331352',\n", + " '000000296649',\n", + " '000000386912',\n", + " '000000502136',\n", + " '000000491497',\n", + " '000000184791',\n", + " '000000348881',\n", + " '000000289393',\n", + " '000000522713',\n", + " '000000181666',\n", + " '000000017627',\n", + " '000000143931',\n", + " '000000303818',\n", + " '000000463730',\n", + " '000000460347',\n", + " '000000322864',\n", + " '000000226111',\n", + " '000000153299',\n", + " '000000308394',\n", + " '000000456496',\n", + " '000000058636',\n", + " '000000041888',\n", + " '000000184321',\n", + " '000000565778',\n", + " '000000297343',\n", + " '000000336587',\n", + " '000000122745',\n", + " '000000219578',\n", + " '000000555705',\n", + " '000000443303',\n", + " '000000500663',\n", + " '000000418281',\n", + " '000000025560',\n", + " '000000403817',\n", + " '000000085329',\n", + " '000000329323',\n", + " '000000239274',\n", + " '000000286994',\n", + " '000000511321',\n", + " '000000314294',\n", + " '000000233771',\n", + " '000000475779',\n", + " '000000301867',\n", + " '000000312421',\n", + " '000000185250',\n", + " '000000356427',\n", + " '000000572517',\n", + " '000000270244',\n", + " '000000516316',\n", + " '000000125211',\n", + " '000000562121',\n", + " '000000360661',\n", + " '000000016228',\n", + " '000000382088',\n", + " '000000266409',\n", + " '000000430961',\n", + " '000000080671',\n", + " '000000577539',\n", + " '000000104612',\n", + " '000000476258',\n", + " '000000448365',\n", + " '000000035197',\n", + " '000000349860',\n", + " '000000180135',\n", + " '000000486438',\n", + " '000000400573',\n", + " '000000109798',\n", + " '000000370677',\n", + " '000000238866',\n", + " '000000369370',\n", + " '000000502737',\n", + " '000000515579',\n", + " '000000515445',\n", + " '000000173383',\n", + " '000000438862',\n", + " '000000180560',\n", + " '000000347693',\n", + " '000000039956',\n", + " '000000321214',\n", + " '000000474028',\n", + " '000000066523',\n", + " '000000355257',\n", + " '000000142092',\n", + " '000000063154',\n", + " '000000199551',\n", + " '000000239347',\n", + " '000000514508',\n", + " '000000473237',\n", + " '000000228144',\n", + " '000000206027',\n", + " '000000078915',\n", + " '000000551215',\n", + " '000000544519',\n", + " '000000096493',\n", + " '000000023899',\n", + " '000000340175',\n", + " '000000578500',\n", + " '000000366141',\n", + " '000000057597',\n", + " '000000559842',\n", + " '000000434230',\n", + " '000000428454',\n", + " '000000399462',\n", + " '000000261061',\n", + " '000000168330',\n", + " '000000383384',\n", + " '000000342006',\n", + " '000000217285',\n", + " '000000236412',\n", + " '000000524456',\n", + " '000000153343',\n", + " '000000095786',\n", + " '000000326541',\n", + " '000000213086',\n", + " '000000231339',\n", + " '000000508730',\n", + " '000000550426',\n", + " '000000368294',\n", + " '000000171190',\n", + " '000000301135',\n", + " '000000580294',\n", + " '000000494869',\n", + " '000000033638',\n", + " '000000329219',\n", + " '000000034873',\n", + " '000000186980',\n", + " '000000127182',\n", + " '000000356387',\n", + " '000000367680',\n", + " '000000263796',\n", + " '000000117425',\n", + " '000000365387',\n", + " '000000487583',\n", + " '000000504711',\n", + " '000000363840',\n", + " '000000214720',\n", + " '000000379453',\n", + " '000000311295',\n", + " '000000029393',\n", + " '000000278848',\n", + " '000000166391',\n", + " '000000048153',\n", + " '000000459153',\n", + " '000000295713',\n", + " '000000223130',\n", + " '000000273132',\n", + " '000000198960',\n", + " '000000344059',\n", + " '000000410428',\n", + " '000000087875',\n", + " '000000450758',\n", + " '000000458790',\n", + " '000000460160',\n", + " '000000458109',\n", + " '000000030675',\n", + " '000000566524',\n", + " '000000338428',\n", + " '000000545826',\n", + " '000000166277',\n", + " '000000269314',\n", + " '000000476415',\n", + " '000000292082',\n", + " '000000360137',\n", + " '000000122046',\n", + " '000000352684',\n", + " '000000512836',\n", + " '000000008021',\n", + " '000000107226',\n", + " '000000084477',\n", + " '000000562243',\n", + " '000000181859',\n", + " '000000177015',\n", + " '000000292236',\n", + " '000000121506',\n", + " '000000288042',\n", + " '000000453860',\n", + " '000000500257',\n", + " '000000113403',\n", + " '000000125062',\n", + " '000000375015',\n", + " '000000334719',\n", + " '000000134112',\n", + " '000000283520',\n", + " '000000031269',\n", + " '000000319721',\n", + " '000000165351',\n", + " '000000347265',\n", + " '000000414170',\n", + " '000000231508',\n", + " '000000389381',\n", + " '000000118921',\n", + " '000000021503',\n", + " '000000000785',\n", + " '000000300842',\n", + " '000000105014',\n", + " '000000261982',\n", + " '000000034205',\n", + " '000000099242',\n", + " '000000314709',\n", + " '000000460494',\n", + " '000000339442',\n", + " '000000541055',\n", + " '000000409475',\n", + " '000000464786',\n", + " '000000378605',\n", + " '000000331817',\n", + " '000000218091',\n", + " '000000578545',\n", + " '000000363207',\n", + " '000000372577',\n", + " '000000212166',\n", + " '000000172571',\n", + " '000000294831',\n", + " '000000084431',\n", + " '000000323355',\n", + " '000000355325',\n", + " '000000100582',\n", + " '000000555412',\n", + " '000000004495',\n", + " '000000009483',\n", + " '000000326082',\n", + " '000000398237',\n", + " '000000507223',\n", + " '000000031050',\n", + " '000000239537',\n", + " '000000340930',\n", + " '000000011813',\n", + " '000000281414',\n", + " '000000537991',\n", + " '000000284282',\n", + " '000000321333',\n", + " '000000521282',\n", + " '000000108026',\n", + " '000000243204',\n", + " '000000177935',\n", + " '000000038829',\n", + " '000000397327',\n", + " '000000501523',\n", + " '000000555050',\n", + " '000000376442',\n", + " '000000187243',\n", + " '000000356347',\n", + " '000000293044',\n", + " '000000560279',\n", + " '000000042276',\n", + " '000000534827',\n", + " '000000190756',\n", + " '000000482917',\n", + " '000000300659',\n", + " '000000199977',\n", + " '000000442480',\n", + " '000000384350',\n", + " '000000383621',\n", + " '000000189828',\n", + " '000000412894',\n", + " '000000537153',\n", + " '000000361103',\n", + " '000000392722',\n", + " '000000338560',\n", + " '000000264535',\n", + " '000000295231',\n", + " '000000154947',\n", + " '000000212559',\n", + " '000000458755',\n", + " '000000104782',\n", + " '000000315257',\n", + " '000000130599',\n", + " '000000227187',\n", + " '000000151662',\n", + " '000000461275',\n", + " '000000523811',\n", + " '000000456559',\n", + " '000000101068',\n", + " '000000140640',\n", + " '000000516708',\n", + " '000000544605',\n", + " '000000385190',\n", + " '000000338986',\n", + " '000000053994',\n", + " '000000061171',\n", + " '000000314034',\n", + " '000000291490',\n", + " '000000152740',\n", + " '000000024919',\n", + " '000000079837',\n", + " '000000021903',\n", + " '000000564133',\n", + " '000000337055',\n", + " '000000110638',\n", + " '000000034139',\n", + " '000000080340',\n", + " '000000083113',\n", + " '000000173033',\n", + " '000000255664',\n", + " '000000072813',\n", + " '000000545129',\n", + " '000000546011',\n", + " '000000121031',\n", + " '000000172547',\n", + " '000000369081',\n", + " '000000509131',\n", + " '000000578922',\n", + " '000000464089',\n", + " '000000453708',\n", + " '000000177714',\n", + " '000000459887',\n", + " '000000155179',\n", + " '000000261116',\n", + " '000000396274',\n", + " '000000029640',\n", + " '000000141328',\n", + " '000000308430',\n", + " '000000043314',\n", + " '000000273715',\n", + " '000000456303',\n", + " '000000406611',\n", + " '000000475064',\n", + " '000000466567',\n", + " '000000137246',\n", + " '000000015079',\n", + " '000000296284',\n", + " '000000226147',\n", + " '000000226903',\n", + " '000000127517',\n", + " '000000162092',\n", + " '000000131379',\n", + " '000000366611',\n", + " '000000263969',\n", + " '000000551439',\n", + " '000000474167',\n", + " '000000159458',\n", + " '000000554735',\n", + " '000000099428',\n", + " '000000386352',\n", + " '000000173004',\n", + " '000000311394',\n", + " '000000578489',\n", + " '000000189310',\n", + " '000000491366',\n", + " '000000448076',\n", + " '000000293804',\n", + " '000000312237',\n", + " '000000221291',\n", + " '000000141821',\n", + " '000000410650',\n", + " '000000199310',\n", + " '000000323151',\n", + " '000000089648',\n", + " '000000219283',\n", + " '000000471869',\n", + " '000000520264',\n", + " '000000111179',\n", + " '000000151000',\n", + " '000000100624',\n", + " '000000332570',\n", + " '000000057238',\n", + " '000000502732',\n", + " '000000135561',\n", + " '000000008277',\n", + " '000000173044',\n", + " '000000168458',\n", + " '000000512194',\n", + " '000000370042',\n", + " '000000189436',\n", + " '000000533958',\n", + " '000000117645',\n", + " '000000221708',\n", + " '000000202228',\n", + " '000000403565',\n", + " '000000211042',\n", + " '000000492878',\n", + " '000000441586',\n", + " '000000547816',\n", + " '000000306733',\n", + " '000000530099',\n", + " '000000312278',\n", + " '000000097679',\n", + " '000000564127',\n", + " '000000251065',\n", + " '000000003845',\n", + " '000000138819',\n", + " '000000205834',\n", + " '000000348708',\n", + " '000000166521',\n", + " '000000485802',\n", + " '000000099054',\n", + " '000000022969',\n", + " '000000570539',\n", + " '000000278353',\n", + " '000000158548',\n", + " '000000461405',\n", + " '000000176606',\n", + " '000000044699',\n", + " '000000559956',\n", + " '000000268996',\n", + " '000000011197',\n", + " '000000483667',\n", + " '000000448810',\n", + " '000000000724',\n", + " '000000051961',\n", + " '000000375278',\n", + " '000000302165',\n", + " '000000131131',\n", + " '000000098839',\n", + " '000000402992',\n", + " '000000465675',\n", + " '000000240754',\n", + " '000000021167',\n", + " '000000148730',\n", + " '000000384468',\n", + " '000000253742',\n", + " '000000186873',\n", + " '000000082180',\n", + " '000000446522',\n", + " '000000552902',\n", + " '000000125405',\n", + " '000000110211',\n", + " '000000016010',\n", + " '000000064462',\n", + " '000000314182',\n", + " '000000248980',\n", + " '000000068387',\n", + " '000000429281',\n", + " '000000345466',\n", + " '000000352900',\n", + " '000000118367',\n", + " '000000113235',\n", + " '000000311303',\n", + " '000000163640',\n", + " '000000370999',\n", + " '000000001490',\n", + " '000000329456',\n", + " '000000570471',\n", + " '000000088269',\n", + " '000000260470',\n", + " '000000193494',\n", + " '000000252776',\n", + " '000000201072',\n", + " '000000018150',\n", + " '000000337498',\n", + " '000000521405',\n", + " '000000518770',\n", + " '000000201646',\n", + " '000000036936',\n", + " '000000059044',\n", + " '000000172946',\n", + " '000000234607',\n", + " '000000532690',\n", + " '000000323895',\n", + " '000000384670',\n", + " '000000050326',\n", + " '000000205542',\n", + " '000000217957',\n", + " '000000162035',\n", + " '000000415727',\n", + " '000000046252',\n", + " '000000182021',\n", + " '000000231747',\n", + " '000000090284',\n", + " '000000286553',\n", + " '000000488736',\n", + " '000000063602',\n", + " '000000383386',\n", + " '000000450686',\n", + " '000000005060',\n", + " '000000286523',\n", + " '000000120420',\n", + " '000000579655',\n", + " '000000117908',\n", + " '000000550322',\n", + " '000000322844',\n", + " '000000218362',\n", + " '000000213224',\n", + " '000000223747',\n", + " '000000297578',\n", + " '000000458992',\n", + " '000000078266',\n", + " '000000164602',\n", + " '000000440475',\n", + " '000000101762',\n", + " '000000557501',\n", + " '000000203317',\n", + " '000000368940',\n", + " '000000569917',\n", + " '000000144798',\n", + " '000000284623',\n", + " '000000520301',\n", + " '000000127987',\n", + " '000000063740',\n", + " '000000036494',\n", + " '000000210032',\n", + " '000000488270',\n", + " '000000067180',\n", + " '000000281179',\n", + " '000000064359',\n", + " '000000126226',\n", + " '000000190923',\n", + " '000000150265',\n", + " '000000216739',\n", + " '000000038048',\n", + " '000000354829',\n", + " '000000525155',\n", + " '000000163314',\n", + " '000000259571',\n", + " '000000561679',\n", + " '000000236166',\n", + " '000000153529',\n", + " '000000473015',\n", + " '000000379800',\n", + " '000000253835',\n", + " '000000034071',\n", + " '000000036861',\n", + " '000000569565',\n", + " '000000219271',\n", + " '000000205647',\n", + " '000000460841',\n", + " '000000123131',\n", + " '000000334006',\n", + " '000000511599',\n", + " '000000229858',\n", + " '000000174004',\n", + " '000000519764',\n", + " '000000137576',\n", + " '000000087470',\n", + " '000000009769',\n", + " '000000558114',\n", + " '000000205776',\n", + " '000000163257',\n", + " '000000475678',\n", + " '000000085478',\n", + " '000000318080',\n", + " '000000361551',\n", + " '000000236784',\n", + " '000000092839',\n", + " '000000042296',\n", + " '000000560266',\n", + " '000000486479',\n", + " '000000127955',\n", + " '000000307658',\n", + " '000000417465',\n", + " '000000342971',\n", + " '000000011760',\n", + " '000000069106',\n", + " '000000070158',\n", + " '000000176634',\n", + " '000000281447',\n", + " '000000552371',\n", + " '000000361919',\n", + " '000000560256',\n", + " '000000138115',\n", + " '000000114871',\n", + " '000000374369',\n", + " '000000123213',\n", + " '000000123321',\n", + " '000000015278',\n", + " '000000357742',\n", + " '000000439854',\n", + " '000000465836',\n", + " '000000414385',\n", + " '000000131556',\n", + " '000000322724',\n", + " '000000320664',\n", + " '000000481390',\n", + " '000000109916',\n", + " '000000276434',\n", + " '000000579635',\n", + " '000000295316',\n", + " '000000571313',\n", + " '000000183127',\n", + " '000000115898',\n", + " '000000146358',\n", + " '000000329542',\n", + " '000000189752',\n", + " '000000290163',\n", + " '000000091406',\n", + " '000000322352',\n", + " '000000223959',\n", + " '000000326248',\n", + " '000000218439',\n", + " '000000453722',\n", + " '000000293625',\n", + " '000000411817',\n", + " '000000546964',\n", + " '000000215259',\n", + " '000000573094',\n", + " '000000560011',\n", + " '000000038576',\n", + " '000000147729',\n", + " '000000579307',\n", + " '000000154425',\n", + " '000000432898',\n", + " '000000404923',\n", + " '000000130586',\n", + " '000000163057',\n", + " '000000007511',\n", + " '000000067406',\n", + " '000000290179',\n", + " '000000248752',\n", + " '000000054593',\n", + " '000000116208',\n", + " '000000340697',\n", + " '000000450303',\n", + " '000000494427',\n", + " '000000137294',\n", + " '000000410880',\n", + " '000000311180',\n", + " '000000091654',\n", + " '000000181796',\n", + " '000000002431',\n", + " '000000349184',\n", + " '000000298396',\n", + " '000000472046',\n", + " '000000074058',\n", + " '000000058029',\n", + " '000000134096',\n", + " '000000111951',\n", + " '000000103585',\n", + " '000000210273',\n", + " '000000352584',\n", + " '000000446651',\n", + " '000000194875',\n", + " '000000052017',\n", + " '000000336309',\n", + " '000000227478',\n", + " '000000339870',\n", + " '000000080666',\n", + " '000000033707',\n", + " '000000327601',\n", + " '000000255749',\n", + " '000000008762',\n", + " '000000526392',\n", + " '000000535578',\n", + " '000000580757',\n", + " '000000165039',\n", + " '000000148719',\n", + " '000000108440',\n", + " '000000489842',\n", + " '000000579818',\n", + " '000000423229',\n", + " '000000323828',\n", + " '000000166287',\n", + " '000000101420',\n", + " '000000334555',\n", + " '000000196759',\n", + " '000000411665',\n", + " '000000061418',\n", + " '000000526751',\n", + " '000000024021',\n", + " '000000277020',\n", + " '000000047828',\n", + " '000000183716',\n", + " '000000271997',\n", + " '000000008532',\n", + " '000000094336',\n", + " '000000390555',\n", + " '000000250282',\n", + " '000000068409',\n", + " '000000002299',\n", + " '000000011051',\n", + " '000000066038',\n", + " '000000360960',\n", + " '000000360097',\n", + " '000000421455',\n", + " '000000504589',\n", + " '000000464522',\n", + " '000000454750',\n", + " '000000509735',\n", + " '000000023034',\n", + " '000000141671',\n", + " '000000506656',\n", + " '000000272566',\n", + " '000000045728',\n", + " '000000424551',\n", + " '000000341719',\n", + " '000000072795',\n", + " '000000078959',\n", + " '000000417285',\n", + " '000000002157',\n", + " '000000043816',\n", + " '000000455555',\n", + " '000000535306',\n", + " '000000030504',\n", + " '000000093353',\n", + " '000000530052',\n", + " '000000473118',\n", + " '000000091779',\n", + " '000000283113',\n", + " '000000226130',\n", + " '000000097278',\n", + " '000000567640',\n", + " '000000532493',\n", + " '000000045550',\n", + " '000000156643',\n", + " '000000430056',\n", + " '000000410456',\n", + " '000000441286',\n", + " '000000279541',\n", + " '000000000885',\n", + " '000000378284',\n", + " '000000156076',\n", + " '000000143572',\n", + " '000000229849',\n", + " '000000039551',\n", + " '000000056344',\n", + " '000000193348',\n", + " '000000016958',\n", + " '000000572678',\n", + " '000000106235',\n", + " '000000341681',\n", + " '000000083172',\n", + " '000000343524',\n", + " '000000395801',\n", + " '000000388056',\n", + " '000000259690',\n", + " '000000235836',\n", + " '000000343218',\n", + " '000000205105',\n", + " '000000513283',\n", + " '000000176446',\n", + " '000000371677',\n", + " '000000308531',\n", + " '000000497599',\n", + " '000000455352',\n", + " '000000236914',\n", + " '000000232684',\n", + " '000000415238',\n", + " '000000290843',\n", + " '000000519522',\n", + " '000000144784',\n", + " '000000167486',\n", + " '000000392228',\n", + " '000000488673',\n", + " '000000191013',\n", + " '000000080057',\n", + " '000000570169',\n", + " '000000224807',\n", + " '000000163562',\n", + " '000000136355',\n", + " '000000492362',\n", + " '000000102707',\n", + " '000000232563',\n", + " '000000010977',\n", + " '000000051598',\n", + " '000000032285',\n", + " '000000520910',\n", + " '000000131273',\n", + " '000000206411',\n", + " '000000472375',\n", + " '000000481404',\n", + " '000000471991',\n", + " '000000017436',\n", + " '000000177934',\n", + " '000000165518',\n", + " '000000571718',\n", + " '000000459467',\n", + " '000000135673',\n", + " '000000134886',\n", + " '000000485895',\n", + " '000000287545',\n", + " '000000577182',\n", + " '000000289222',\n", + " '000000372819',\n", + " '000000310072',\n", + " '000000087144',\n", + " '000000430875',\n", + " '000000060347',\n", + " '000000042070',\n", + " '000000420916',\n", + " '000000453584',\n", + " '000000296224',\n", + " '000000122606',\n", + " '000000311909',\n", + " '000000579893',\n", + " '000000284296',\n", + " '000000221017',\n", + " '000000315001',\n", + " '000000439715',\n", + " '000000284991',\n", + " '000000389566',\n", + " '000000078843',\n", + " '000000122927',\n", + " '000000225532',\n", + " '000000013659',\n", + " '000000153568',\n", + " '000000395633',\n", + " '000000419096',\n", + " '000000203488',\n", + " '000000361268',\n", + " '000000466125',\n", + " '000000414795',\n", + " '000000508101',\n", + " '000000253386',\n", + " '000000222991',\n", + " '000000530854',\n", + " '000000351810',\n", + " '000000338624',\n", + " '000000138492',\n", + " '000000263463',\n", + " '000000226592',\n", + " '000000378454',\n", + " '000000020059',\n", + " '000000227686',\n", + " '000000476215',\n", + " '000000297698',\n", + " '000000247917',\n", + " '000000439522',\n", + " '000000479448',\n", + " '000000424721',\n", + " '000000026690',\n", + " '000000558854',\n", + " '000000176901',\n", + " '000000334767',\n", + " '000000301563',\n", + " '000000086755',\n", + " '000000194471',\n", + " '000000420281',\n", + " '000000533206',\n", + " '000000099810',\n", + " '000000334483',\n", + " '000000089670',\n", + " '000000482275',\n", + " '000000404805',\n", + " '000000002261',\n", + " '000000425702',\n", + " '000000036844',\n", + " '000000012576',\n", + " '000000361238',\n", + " '000000108253',\n", + " '000000319935',\n", + " '000000003934',\n", + " '000000029596',\n", + " '000000047740',\n", + " '000000077460',\n", + " '000000014439',\n", + " '000000571893',\n", + " '000000447314',\n", + " '000000181303',\n", + " '000000058350',\n", + " '000000026465',\n", + " '000000246968',\n", + " '000000536947',\n", + " '000000076731',\n", + " '000000286182',\n", + " '000000433980',\n", + " '000000561366',\n", + " '000000380913',\n", + " '000000032887',\n", + " '000000517687',\n", + " '000000213035',\n", + " '000000399205',\n", + " '000000349837',\n", + " '000000350002',\n", + " '000000131431',\n", + " '000000356248',\n", + " '000000334399',\n", + " '000000057150',\n", + " '000000363666',\n", + " '000000507235',\n", + " '000000169996',\n", + " '000000226417',\n", + " '000000481573',\n", + " '000000056127',\n", + " '000000123480',\n", + " '000000274687',\n", + " '000000164637',\n", + " '000000178028',\n", + " '000000493286',\n", + " '000000348216',\n", + " '000000345027',\n", + " '000000571804',\n", + " '000000140658',\n", + " '000000102644',\n", + " '000000581615',\n", + " '000000279887',\n", + " '000000230008',\n", + " '000000284698',\n", + " '000000102356',\n", + " '000000456394',\n", + " '000000323709',\n", + " '000000452122',\n", + " '000000579158',\n", + " '000000525322',\n", + " '000000033114',\n", + " '000000008690',\n", + " '000000381639',\n", + " '000000217614',\n", + " '000000284445',\n", + " '000000468124',\n", + " '000000187144',\n", + " '000000273198',\n", + " '000000095843',\n", + " '000000417779',\n", + " '000000447342',\n", + " '000000166563',\n", + " '000000490125',\n", + " '000000561009',\n", + " '000000183675',\n", + " '000000290248',\n", + " '000000532058',\n", + " '000000214200',\n", + " '000000578093',\n", + " '000000369751',\n", + " '000000429011',\n", + " '000000301061',\n", + " '000000105264',\n", + " '000000267434',\n", + " '000000370711',\n", + " '000000025393',\n", + " '000000471087',\n", + " '000000106757',\n", + " '000000183648',\n", + " '000000358525',\n", + " '000000049269',\n", + " '000000079144',\n", + " '000000519688',\n", + " '000000431727',\n", + " '000000130699',\n", + " '000000215245',\n", + " '000000091921',\n", + " '000000218424',\n", + " '000000473974',\n", + " '000000405249',\n", + " '000000235784',\n", + " '000000521540',\n", + " '000000537506',\n", + " '000000119445',\n", + " '000000507015',\n", + " '000000173830',\n", + " '000000356498',\n", + " '000000435081',\n", + " '000000018575',\n", + " '000000373315',\n", + " '000000227765',\n", + " '000000013546',\n", + " '000000067310',\n", + " '000000125936',\n", + " '000000389109',\n", + " '000000322211',\n", + " '000000184384',\n", + " '000000426329',\n", + " '000000128476',\n", + " '000000414034',\n", + " '000000450488',\n", + " '000000099182',\n", + " '000000051738',\n", + " '000000099039',\n", + " '000000075456',\n", + " '000000134882',\n", + " '000000442323',\n", + " '000000232489',\n", + " '000000351823',\n", + " '000000065736',\n", + " '000000001000',\n", + " '000000379842',\n", + " '000000013923',\n", + " '000000559543',\n", + " '000000185890',\n", + " '000000357978',\n", + " '000000129492',\n", + " '000000261097',\n", + " '000000410510',\n", + " '000000039951',\n", + " '000000306700',\n", + " '000000146457',\n", + " '000000214224',\n", + " '000000332845',\n", + " '000000255483',\n", + " '000000222455',\n", + " '000000187271',\n", + " '000000462629',\n", + " '000000544565',\n", + " '000000369771',\n", + " '000000035963',\n", + " '000000289516',\n", + " '000000334309',\n", + " '000000452084',\n", + " '000000301718',\n", + " '000000429598',\n", + " '000000165257',\n", + " '000000093437',\n", + " '000000413552',\n", + " '000000062025',\n", + " '000000017379',\n", + " '000000176778',\n", + " '000000104572',\n", + " '000000090108',\n", + " '000000157124',\n", + " '000000089556',\n", + " '000000266206',\n", + " '000000086220',\n", + " '000000508602',\n", + " ...]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -126,7 +1135,7 @@ " \n", " return ids\n", "\n", - "ids = get_ids(dataset, subsets[0])" + "get_ids(dataset, subsets[0])" ] }, { @@ -134,80 +1143,2098 @@ "id": "db0e0346", "metadata": {}, "source": [ - "In VOC dataset, there are 'train' and 'test' subset. We will filter only 'train' subset." + "We here adopt `reindex` transformation to make `media_id` be incrementing from `start`." ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 16, "id": "51bf3388", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['0',\n", + " '1',\n", + " '2',\n", + " '3',\n", + " '4',\n", + " '5',\n", + " '6',\n", + " '7',\n", + " '8',\n", + " '9',\n", + " '10',\n", + " '11',\n", + " '12',\n", + " '13',\n", + " '14',\n", + " '15',\n", + " '16',\n", + " '17',\n", + " '18',\n", + " '19',\n", + " '20',\n", + " '21',\n", + " '22',\n", + " '23',\n", + " '24',\n", + " '25',\n", + " '26',\n", + " '27',\n", + " '28',\n", + " '29',\n", + " '30',\n", + " '31',\n", + " '32',\n", + " '33',\n", + " '34',\n", + " '35',\n", + " '36',\n", + " '37',\n", + " '38',\n", + " '39',\n", + " '40',\n", + " '41',\n", + " '42',\n", + " '43',\n", + " '44',\n", + " '45',\n", + " '46',\n", + " '47',\n", + " '48',\n", + " '49',\n", + " '50',\n", + " '51',\n", + " '52',\n", + " '53',\n", + " '54',\n", + " '55',\n", + " '56',\n", + " '57',\n", + " '58',\n", + " '59',\n", + " '60',\n", + " '61',\n", + " '62',\n", + " '63',\n", + " '64',\n", + " '65',\n", + " '66',\n", + " '67',\n", + " '68',\n", + " '69',\n", + " '70',\n", + " '71',\n", + " '72',\n", + " '73',\n", + " '74',\n", + " '75',\n", + " '76',\n", + " '77',\n", + " '78',\n", + " '79',\n", + " '80',\n", + " '81',\n", + " '82',\n", + " '83',\n", + " '84',\n", + " '85',\n", + " '86',\n", + " '87',\n", + " '88',\n", + " '89',\n", + " '90',\n", + " '91',\n", + " '92',\n", + " '93',\n", + " '94',\n", + " '95',\n", + " '96',\n", + " '97',\n", + " '98',\n", + " '99',\n", + " '100',\n", + " '101',\n", + " '102',\n", + " '103',\n", + " '104',\n", + " '105',\n", + " '106',\n", + " '107',\n", + " '108',\n", + " '109',\n", + " '110',\n", + " '111',\n", + " '112',\n", + " '113',\n", + " '114',\n", + " '115',\n", + " '116',\n", + " '117',\n", + " '118',\n", + " '119',\n", + " '120',\n", + " '121',\n", + " '122',\n", + " '123',\n", + " '124',\n", + " '125',\n", + " '126',\n", + " '127',\n", + " '128',\n", + " '129',\n", + " '130',\n", + " '131',\n", + " '132',\n", + " '133',\n", + " '134',\n", + " '135',\n", + " '136',\n", + " '137',\n", + " '138',\n", + " '139',\n", + " '140',\n", + " '141',\n", + " '142',\n", + " '143',\n", + " '144',\n", + " '145',\n", + " '146',\n", + " '147',\n", + " '148',\n", + " '149',\n", + " '150',\n", + " '151',\n", + " '152',\n", + " '153',\n", + " '154',\n", + " '155',\n", + " '156',\n", + " '157',\n", + " '158',\n", + " '159',\n", + " '160',\n", + " '161',\n", + " '162',\n", + " '163',\n", + " '164',\n", + " '165',\n", + " '166',\n", + " '167',\n", + " '168',\n", + " '169',\n", + " '170',\n", + " '171',\n", + " '172',\n", + " '173',\n", + " '174',\n", + " '175',\n", + " '176',\n", + " '177',\n", + " '178',\n", + " '179',\n", + " '180',\n", + " '181',\n", + " '182',\n", + " '183',\n", + " '184',\n", + " '185',\n", + " '186',\n", + " '187',\n", + " '188',\n", + " '189',\n", + " '190',\n", + " '191',\n", + " '192',\n", + " '193',\n", + " '194',\n", + " '195',\n", + " '196',\n", + " '197',\n", + " '198',\n", + " '199',\n", + " '200',\n", + " '201',\n", + " '202',\n", + " '203',\n", + " '204',\n", + " '205',\n", + " '206',\n", + " '207',\n", + " '208',\n", + " '209',\n", + " '210',\n", + " '211',\n", + " '212',\n", + " '213',\n", + " '214',\n", + " '215',\n", + " '216',\n", + " '217',\n", + " '218',\n", + " '219',\n", + " '220',\n", + " '221',\n", + " '222',\n", + " '223',\n", + " '224',\n", + " '225',\n", + " '226',\n", + " '227',\n", + " '228',\n", + " '229',\n", + " '230',\n", + " '231',\n", + " '232',\n", + " '233',\n", + " '234',\n", + " '235',\n", + " '236',\n", + " '237',\n", + " '238',\n", + " '239',\n", + " '240',\n", + " '241',\n", + " '242',\n", + " '243',\n", + " '244',\n", + " '245',\n", + " '246',\n", + " '247',\n", + " '248',\n", + " '249',\n", + " '250',\n", + " '251',\n", + " '252',\n", + " '253',\n", + " '254',\n", + " '255',\n", + " '256',\n", + " '257',\n", + " '258',\n", + " '259',\n", + " '260',\n", + " '261',\n", + " '262',\n", + " '263',\n", + " '264',\n", + " '265',\n", + " '266',\n", + " '267',\n", + " '268',\n", + " '269',\n", + " '270',\n", + " '271',\n", + " '272',\n", + " '273',\n", + " '274',\n", + " '275',\n", + " '276',\n", + " '277',\n", + " '278',\n", + " '279',\n", + " '280',\n", + " '281',\n", + " '282',\n", + " '283',\n", + " '284',\n", + " '285',\n", + " '286',\n", + " '287',\n", + " '288',\n", + " '289',\n", + " '290',\n", + " '291',\n", + " '292',\n", + " '293',\n", + " '294',\n", + " '295',\n", + " '296',\n", + " '297',\n", + " '298',\n", + " '299',\n", + " '300',\n", + " '301',\n", + " '302',\n", + " '303',\n", + " '304',\n", + " '305',\n", + " '306',\n", + " '307',\n", + " '308',\n", + " '309',\n", + " '310',\n", + " '311',\n", + " '312',\n", + " '313',\n", + " '314',\n", + " '315',\n", + " '316',\n", + " '317',\n", + " '318',\n", + " '319',\n", + " '320',\n", + " '321',\n", + " '322',\n", + " '323',\n", + " '324',\n", + " '325',\n", + " '326',\n", + " '327',\n", + " '328',\n", + " '329',\n", + " '330',\n", + " '331',\n", + " '332',\n", + " '333',\n", + " '334',\n", + " '335',\n", + " '336',\n", + " '337',\n", + " '338',\n", + " '339',\n", + " '340',\n", + " '341',\n", + " '342',\n", + " '343',\n", + " '344',\n", + " '345',\n", + " '346',\n", + " '347',\n", + " '348',\n", + " '349',\n", + " '350',\n", + " '351',\n", + " '352',\n", + " '353',\n", + " '354',\n", + " '355',\n", + " '356',\n", + " '357',\n", + " '358',\n", + " '359',\n", + " '360',\n", + " '361',\n", + " '362',\n", + " '363',\n", + " '364',\n", + " '365',\n", + " '366',\n", + " '367',\n", + " '368',\n", + " '369',\n", + " '370',\n", + " '371',\n", + " '372',\n", + " '373',\n", + " '374',\n", + " '375',\n", + " '376',\n", + " '377',\n", + " '378',\n", + " '379',\n", + " '380',\n", + " '381',\n", + " '382',\n", + " '383',\n", + " '384',\n", + " '385',\n", + " '386',\n", + " '387',\n", + " '388',\n", + " '389',\n", + " '390',\n", + " '391',\n", + " '392',\n", + " '393',\n", + " '394',\n", + " '395',\n", + " '396',\n", + " '397',\n", + " '398',\n", + " '399',\n", + " '400',\n", + " '401',\n", + " '402',\n", + " '403',\n", + " '404',\n", + " '405',\n", + " '406',\n", + " '407',\n", + " '408',\n", + " '409',\n", + " '410',\n", + " '411',\n", + " '412',\n", + " '413',\n", + " '414',\n", + " '415',\n", + " '416',\n", + " '417',\n", + " '418',\n", + " '419',\n", + " '420',\n", + " '421',\n", + " '422',\n", + " '423',\n", + " '424',\n", + " '425',\n", + " '426',\n", + " '427',\n", + " '428',\n", + " '429',\n", + " '430',\n", + " '431',\n", + " '432',\n", + " '433',\n", + " '434',\n", + " '435',\n", + " '436',\n", + " '437',\n", + " '438',\n", + " '439',\n", + " '440',\n", + " '441',\n", + " '442',\n", + " '443',\n", + " '444',\n", + " '445',\n", + " '446',\n", + " '447',\n", + " '448',\n", + " '449',\n", + " '450',\n", + " '451',\n", + " '452',\n", + " '453',\n", + " '454',\n", + " '455',\n", + " '456',\n", + " '457',\n", + " '458',\n", + " '459',\n", + " '460',\n", + " '461',\n", + " '462',\n", + " '463',\n", + " '464',\n", + " '465',\n", + " '466',\n", + " '467',\n", + " '468',\n", + " '469',\n", + " '470',\n", + " '471',\n", + " '472',\n", + " '473',\n", + " '474',\n", + " '475',\n", + " '476',\n", + " '477',\n", + " '478',\n", + " '479',\n", + " '480',\n", + " '481',\n", + " '482',\n", + " '483',\n", + " '484',\n", + " '485',\n", + " '486',\n", + " '487',\n", + " '488',\n", + " '489',\n", + " '490',\n", + " '491',\n", + " '492',\n", + " '493',\n", + " '494',\n", + " '495',\n", + " '496',\n", + " '497',\n", + " '498',\n", + " '499',\n", + " '500',\n", + " '501',\n", + " '502',\n", + " '503',\n", + " '504',\n", + " '505',\n", + " '506',\n", + " '507',\n", + " '508',\n", + " '509',\n", + " '510',\n", + " '511',\n", + " '512',\n", + " '513',\n", + " '514',\n", + " '515',\n", + " '516',\n", + " '517',\n", + " '518',\n", + " '519',\n", + " '520',\n", + " '521',\n", + " '522',\n", + " '523',\n", + " '524',\n", + " '525',\n", + " '526',\n", + " '527',\n", + " '528',\n", + " '529',\n", + " '530',\n", + " '531',\n", + " '532',\n", + " '533',\n", + " '534',\n", + " '535',\n", + " '536',\n", + " '537',\n", + " '538',\n", + " '539',\n", + " '540',\n", + " '541',\n", + " '542',\n", + " '543',\n", + " '544',\n", + " '545',\n", + " '546',\n", + " '547',\n", + " '548',\n", + " '549',\n", + " '550',\n", + " '551',\n", + " '552',\n", + " '553',\n", + " '554',\n", + " '555',\n", + " '556',\n", + " '557',\n", + " '558',\n", + " '559',\n", + " '560',\n", + " '561',\n", + " '562',\n", + " '563',\n", + " '564',\n", + " '565',\n", + " '566',\n", + " '567',\n", + " '568',\n", + " '569',\n", + " '570',\n", + " '571',\n", + " '572',\n", + " '573',\n", + " '574',\n", + " '575',\n", + " '576',\n", + " '577',\n", + " '578',\n", + " '579',\n", + " '580',\n", + " '581',\n", + " '582',\n", + " '583',\n", + " '584',\n", + " '585',\n", + " '586',\n", + " '587',\n", + " '588',\n", + " '589',\n", + " '590',\n", + " '591',\n", + " '592',\n", + " '593',\n", + " '594',\n", + " '595',\n", + " '596',\n", + " '597',\n", + " '598',\n", + " '599',\n", + " '600',\n", + " '601',\n", + " '602',\n", + " '603',\n", + " '604',\n", + " '605',\n", + " '606',\n", + " '607',\n", + " '608',\n", + " '609',\n", + " '610',\n", + " '611',\n", + " '612',\n", + " '613',\n", + " '614',\n", + " '615',\n", + " '616',\n", + " '617',\n", + " '618',\n", + " '619',\n", + " '620',\n", + " '621',\n", + " '622',\n", + " '623',\n", + " '624',\n", + " '625',\n", + " '626',\n", + " '627',\n", + " '628',\n", + " '629',\n", + " '630',\n", + " '631',\n", + " '632',\n", + " '633',\n", + " '634',\n", + " '635',\n", + " '636',\n", + " '637',\n", + " '638',\n", + " '639',\n", + " '640',\n", + " '641',\n", + " '642',\n", + " '643',\n", + " '644',\n", + " '645',\n", + " '646',\n", + " '647',\n", + " '648',\n", + " '649',\n", + " '650',\n", + " '651',\n", + " '652',\n", + " '653',\n", + " '654',\n", + " '655',\n", + " '656',\n", + " '657',\n", + " '658',\n", + " '659',\n", + " '660',\n", + " '661',\n", + " '662',\n", + " '663',\n", + " '664',\n", + " '665',\n", + " '666',\n", + " '667',\n", + " '668',\n", + " '669',\n", + " '670',\n", + " '671',\n", + " '672',\n", + " '673',\n", + " '674',\n", + " '675',\n", + " '676',\n", + " '677',\n", + " '678',\n", + " '679',\n", + " '680',\n", + " '681',\n", + " '682',\n", + " '683',\n", + " '684',\n", + " '685',\n", + " '686',\n", + " '687',\n", + " '688',\n", + " '689',\n", + " '690',\n", + " '691',\n", + " '692',\n", + " '693',\n", + " '694',\n", + " '695',\n", + " '696',\n", + " '697',\n", + " '698',\n", + " '699',\n", + " '700',\n", + " '701',\n", + " '702',\n", + " '703',\n", + " '704',\n", + " '705',\n", + " '706',\n", + " '707',\n", + " '708',\n", + " '709',\n", + " '710',\n", + " '711',\n", + " '712',\n", + " '713',\n", + " '714',\n", + " '715',\n", + " '716',\n", + " '717',\n", + " '718',\n", + " '719',\n", + " '720',\n", + " '721',\n", + " '722',\n", + " '723',\n", + " '724',\n", + " '725',\n", + " '726',\n", + " '727',\n", + " '728',\n", + " '729',\n", + " '730',\n", + " '731',\n", + " '732',\n", + " '733',\n", + " '734',\n", + " '735',\n", + " '736',\n", + " '737',\n", + " '738',\n", + " '739',\n", + " '740',\n", + " '741',\n", + " '742',\n", + " '743',\n", + " '744',\n", + " '745',\n", + " '746',\n", + " '747',\n", + " '748',\n", + " '749',\n", + " '750',\n", + " '751',\n", + " '752',\n", + " '753',\n", + " '754',\n", + " '755',\n", + " '756',\n", + " '757',\n", + " '758',\n", + " '759',\n", + " '760',\n", + " '761',\n", + " '762',\n", + " '763',\n", + " '764',\n", + " '765',\n", + " '766',\n", + " '767',\n", + " '768',\n", + " '769',\n", + " '770',\n", + " '771',\n", + " '772',\n", + " '773',\n", + " '774',\n", + " '775',\n", + " '776',\n", + " '777',\n", + " '778',\n", + " '779',\n", + " '780',\n", + " '781',\n", + " '782',\n", + " '783',\n", + " '784',\n", + " '785',\n", + " '786',\n", + " '787',\n", + " '788',\n", + " '789',\n", + " '790',\n", + " '791',\n", + " '792',\n", + " '793',\n", + " '794',\n", + " '795',\n", + " '796',\n", + " '797',\n", + " '798',\n", + " '799',\n", + " '800',\n", + " '801',\n", + " '802',\n", + " '803',\n", + " '804',\n", + " '805',\n", + " '806',\n", + " '807',\n", + " '808',\n", + " '809',\n", + " '810',\n", + " '811',\n", + " '812',\n", + " '813',\n", + " '814',\n", + " '815',\n", + " '816',\n", + " '817',\n", + " '818',\n", + " '819',\n", + " '820',\n", + " '821',\n", + " '822',\n", + " '823',\n", + " '824',\n", + " '825',\n", + " '826',\n", + " '827',\n", + " '828',\n", + " '829',\n", + " '830',\n", + " '831',\n", + " '832',\n", + " '833',\n", + " '834',\n", + " '835',\n", + " '836',\n", + " '837',\n", + " '838',\n", + " '839',\n", + " '840',\n", + " '841',\n", + " '842',\n", + " '843',\n", + " '844',\n", + " '845',\n", + " '846',\n", + " '847',\n", + " '848',\n", + " '849',\n", + " '850',\n", + " '851',\n", + " '852',\n", + " '853',\n", + " '854',\n", + " '855',\n", + " '856',\n", + " '857',\n", + " '858',\n", + " '859',\n", + " '860',\n", + " '861',\n", + " '862',\n", + " '863',\n", + " '864',\n", + " '865',\n", + " '866',\n", + " '867',\n", + " '868',\n", + " '869',\n", + " '870',\n", + " '871',\n", + " '872',\n", + " '873',\n", + " '874',\n", + " '875',\n", + " '876',\n", + " '877',\n", + " '878',\n", + " '879',\n", + " '880',\n", + " '881',\n", + " '882',\n", + " '883',\n", + " '884',\n", + " '885',\n", + " '886',\n", + " '887',\n", + " '888',\n", + " '889',\n", + " '890',\n", + " '891',\n", + " '892',\n", + " '893',\n", + " '894',\n", + " '895',\n", + " '896',\n", + " '897',\n", + " '898',\n", + " '899',\n", + " '900',\n", + " '901',\n", + " '902',\n", + " '903',\n", + " '904',\n", + " '905',\n", + " '906',\n", + " '907',\n", + " '908',\n", + " '909',\n", + " '910',\n", + " '911',\n", + " '912',\n", + " '913',\n", + " '914',\n", + " '915',\n", + " '916',\n", + " '917',\n", + " '918',\n", + " '919',\n", + " '920',\n", + " '921',\n", + " '922',\n", + " '923',\n", + " '924',\n", + " '925',\n", + " '926',\n", + " '927',\n", + " '928',\n", + " '929',\n", + " '930',\n", + " '931',\n", + " '932',\n", + " '933',\n", + " '934',\n", + " '935',\n", + " '936',\n", + " '937',\n", + " '938',\n", + " '939',\n", + " '940',\n", + " '941',\n", + " '942',\n", + " '943',\n", + " '944',\n", + " '945',\n", + " '946',\n", + " '947',\n", + " '948',\n", + " '949',\n", + " '950',\n", + " '951',\n", + " '952',\n", + " '953',\n", + " '954',\n", + " '955',\n", + " '956',\n", + " '957',\n", + " '958',\n", + " '959',\n", + " '960',\n", + " '961',\n", + " '962',\n", + " '963',\n", + " '964',\n", + " '965',\n", + " '966',\n", + " '967',\n", + " '968',\n", + " '969',\n", + " '970',\n", + " '971',\n", + " '972',\n", + " '973',\n", + " '974',\n", + " '975',\n", + " '976',\n", + " '977',\n", + " '978',\n", + " '979',\n", + " '980',\n", + " '981',\n", + " '982',\n", + " '983',\n", + " '984',\n", + " '985',\n", + " '986',\n", + " '987',\n", + " '988',\n", + " '989',\n", + " '990',\n", + " '991',\n", + " '992',\n", + " '993',\n", + " '994',\n", + " '995',\n", + " '996',\n", + " '997',\n", + " '998',\n", + " '999',\n", + " ...]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# dataset.transform(\"masks_to_polygons\")\n", "reindexing_dataset = dataset.transform(\"reindex\", start=0)\n", - "\n", - "ids = get_ids(reindexing_dataset, subsets[0])" + "get_ids(reindexing_dataset, subsets[0])" + ] + }, + { + "cell_type": "markdown", + "id": "a77fbadc", + "metadata": {}, + "source": [ + "By adopting `id_from_image_name`, we can rollback the `media_id` to be the media name." ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 17, "id": "fb608396", "metadata": { "scrolled": true }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset\n", - "\tsize=123287\n", - "\tsource_path=coco_dataset\n", - "\tmedia_type=\n", - "\tannotated_items_count=122218\n", - "\tannotations_count=1018861\n", - "subsets\n", - "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", - "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", - "categories\n", - "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", - "\n" - ] + "data": { + "text/plain": [ + "['000000397133',\n", + " '000000037777',\n", + " '000000252219',\n", + " '000000087038',\n", + " '000000174482',\n", + " '000000403385',\n", + " '000000006818',\n", + " '000000480985',\n", + " '000000458054',\n", + " '000000331352',\n", + " '000000296649',\n", + " '000000386912',\n", + " '000000502136',\n", + " '000000491497',\n", + " '000000184791',\n", + " '000000348881',\n", + " '000000289393',\n", + " '000000522713',\n", + " '000000181666',\n", + " '000000017627',\n", + " '000000143931',\n", + " '000000303818',\n", + " '000000463730',\n", + " '000000460347',\n", + " '000000322864',\n", + " '000000226111',\n", + " '000000153299',\n", + " '000000308394',\n", + " '000000456496',\n", + " '000000058636',\n", + " '000000041888',\n", + " '000000184321',\n", + " '000000565778',\n", + " '000000297343',\n", + " '000000336587',\n", + " '000000122745',\n", + " '000000219578',\n", + " '000000555705',\n", + " '000000443303',\n", + " '000000500663',\n", + " '000000418281',\n", + " '000000025560',\n", + " '000000403817',\n", + " '000000085329',\n", + " '000000329323',\n", + " '000000239274',\n", + " '000000286994',\n", + " '000000511321',\n", + " '000000314294',\n", + " '000000233771',\n", + " '000000475779',\n", + " '000000301867',\n", + " '000000312421',\n", + " '000000185250',\n", + " '000000356427',\n", + " '000000572517',\n", + " '000000270244',\n", + " '000000516316',\n", + " '000000125211',\n", + " '000000562121',\n", + " '000000360661',\n", + " '000000016228',\n", + " '000000382088',\n", + " '000000266409',\n", + " '000000430961',\n", + " '000000080671',\n", + " '000000577539',\n", + " '000000104612',\n", + " '000000476258',\n", + " '000000448365',\n", + " '000000035197',\n", + " '000000349860',\n", + " '000000180135',\n", + " '000000486438',\n", + " '000000400573',\n", + " '000000109798',\n", + " '000000370677',\n", + " '000000238866',\n", + " '000000369370',\n", + " '000000502737',\n", + " '000000515579',\n", + " '000000515445',\n", + " '000000173383',\n", + " '000000438862',\n", + " '000000180560',\n", + " '000000347693',\n", + " '000000039956',\n", + " '000000321214',\n", + " '000000474028',\n", + " '000000066523',\n", + " '000000355257',\n", + " '000000142092',\n", + " '000000063154',\n", + " '000000199551',\n", + " '000000239347',\n", + " '000000514508',\n", + " '000000473237',\n", + " '000000228144',\n", + " '000000206027',\n", + " '000000078915',\n", + " '000000551215',\n", + " '000000544519',\n", + " '000000096493',\n", + " '000000023899',\n", + " '000000340175',\n", + " '000000578500',\n", + " '000000366141',\n", + " '000000057597',\n", + " '000000559842',\n", + " '000000434230',\n", + " '000000428454',\n", + " '000000399462',\n", + " '000000261061',\n", + " '000000168330',\n", + " '000000383384',\n", + " '000000342006',\n", + " '000000217285',\n", + " '000000236412',\n", + " '000000524456',\n", + " '000000153343',\n", + " '000000095786',\n", + " '000000326541',\n", + " '000000213086',\n", + " '000000231339',\n", + " '000000508730',\n", + " '000000550426',\n", + " '000000368294',\n", + " '000000171190',\n", + " '000000301135',\n", + " '000000580294',\n", + " '000000494869',\n", + " '000000033638',\n", + " '000000329219',\n", + " '000000034873',\n", + " '000000186980',\n", + " '000000127182',\n", + " '000000356387',\n", + " '000000367680',\n", + " '000000263796',\n", + " '000000117425',\n", + " '000000365387',\n", + " '000000487583',\n", + " '000000504711',\n", + " '000000363840',\n", + " '000000214720',\n", + " '000000379453',\n", + " '000000311295',\n", + " '000000029393',\n", + " '000000278848',\n", + " '000000166391',\n", + " '000000048153',\n", + " '000000459153',\n", + " '000000295713',\n", + " '000000223130',\n", + " '000000273132',\n", + " '000000198960',\n", + " '000000344059',\n", + " '000000410428',\n", + " '000000087875',\n", + " '000000450758',\n", + " '000000458790',\n", + " '000000460160',\n", + " '000000458109',\n", + " '000000030675',\n", + " '000000566524',\n", + " '000000338428',\n", + " '000000545826',\n", + " '000000166277',\n", + " '000000269314',\n", + " '000000476415',\n", + " '000000292082',\n", + " '000000360137',\n", + " '000000122046',\n", + " '000000352684',\n", + " '000000512836',\n", + " '000000008021',\n", + " '000000107226',\n", + " '000000084477',\n", + " '000000562243',\n", + " '000000181859',\n", + " '000000177015',\n", + " '000000292236',\n", + " '000000121506',\n", + " '000000288042',\n", + " '000000453860',\n", + " '000000500257',\n", + " '000000113403',\n", + " '000000125062',\n", + " '000000375015',\n", + " '000000334719',\n", + " '000000134112',\n", + " '000000283520',\n", + " '000000031269',\n", + " '000000319721',\n", + " '000000165351',\n", + " '000000347265',\n", + " '000000414170',\n", + " '000000231508',\n", + " '000000389381',\n", + " '000000118921',\n", + " '000000021503',\n", + " '000000000785',\n", + " '000000300842',\n", + " '000000105014',\n", + " '000000261982',\n", + " '000000034205',\n", + " '000000099242',\n", + " '000000314709',\n", + " '000000460494',\n", + " '000000339442',\n", + " '000000541055',\n", + " '000000409475',\n", + " '000000464786',\n", + " '000000378605',\n", + " '000000331817',\n", + " '000000218091',\n", + " '000000578545',\n", + " '000000363207',\n", + " '000000372577',\n", + " '000000212166',\n", + " '000000172571',\n", + " '000000294831',\n", + " '000000084431',\n", + " '000000323355',\n", + " '000000355325',\n", + " '000000100582',\n", + " '000000555412',\n", + " '000000004495',\n", + " '000000009483',\n", + " '000000326082',\n", + " '000000398237',\n", + " '000000507223',\n", + " '000000031050',\n", + " '000000239537',\n", + " '000000340930',\n", + " '000000011813',\n", + " '000000281414',\n", + " '000000537991',\n", + " '000000284282',\n", + " '000000321333',\n", + " '000000521282',\n", + " '000000108026',\n", + " '000000243204',\n", + " '000000177935',\n", + " '000000038829',\n", + " '000000397327',\n", + " '000000501523',\n", + " '000000555050',\n", + " '000000376442',\n", + " '000000187243',\n", + " '000000356347',\n", + " '000000293044',\n", + " '000000560279',\n", + " '000000042276',\n", + " '000000534827',\n", + " '000000190756',\n", + " '000000482917',\n", + " '000000300659',\n", + " '000000199977',\n", + " '000000442480',\n", + " '000000384350',\n", + " '000000383621',\n", + " '000000189828',\n", + " '000000412894',\n", + " '000000537153',\n", + " '000000361103',\n", + " '000000392722',\n", + " '000000338560',\n", + " '000000264535',\n", + " '000000295231',\n", + " '000000154947',\n", + " '000000212559',\n", + " '000000458755',\n", + " '000000104782',\n", + " '000000315257',\n", + " '000000130599',\n", + " '000000227187',\n", + " '000000151662',\n", + " '000000461275',\n", + " '000000523811',\n", + " '000000456559',\n", + " '000000101068',\n", + " '000000140640',\n", + " '000000516708',\n", + " '000000544605',\n", + " '000000385190',\n", + " '000000338986',\n", + " '000000053994',\n", + " '000000061171',\n", + " '000000314034',\n", + " '000000291490',\n", + " '000000152740',\n", + " '000000024919',\n", + " '000000079837',\n", + " '000000021903',\n", + " '000000564133',\n", + " '000000337055',\n", + " '000000110638',\n", + " '000000034139',\n", + " '000000080340',\n", + " '000000083113',\n", + " '000000173033',\n", + " '000000255664',\n", + " '000000072813',\n", + " '000000545129',\n", + " '000000546011',\n", + " '000000121031',\n", + " '000000172547',\n", + " '000000369081',\n", + " '000000509131',\n", + " '000000578922',\n", + " '000000464089',\n", + " '000000453708',\n", + " '000000177714',\n", + " '000000459887',\n", + " '000000155179',\n", + " '000000261116',\n", + " '000000396274',\n", + " '000000029640',\n", + " '000000141328',\n", + " '000000308430',\n", + " '000000043314',\n", + " '000000273715',\n", + " '000000456303',\n", + " '000000406611',\n", + " '000000475064',\n", + " '000000466567',\n", + " '000000137246',\n", + " '000000015079',\n", + " '000000296284',\n", + " '000000226147',\n", + " '000000226903',\n", + " '000000127517',\n", + " '000000162092',\n", + " '000000131379',\n", + " '000000366611',\n", + " '000000263969',\n", + " '000000551439',\n", + " '000000474167',\n", + " '000000159458',\n", + " '000000554735',\n", + " '000000099428',\n", + " '000000386352',\n", + " '000000173004',\n", + " '000000311394',\n", + " '000000578489',\n", + " '000000189310',\n", + " '000000491366',\n", + " '000000448076',\n", + " '000000293804',\n", + " '000000312237',\n", + " '000000221291',\n", + " '000000141821',\n", + " '000000410650',\n", + " '000000199310',\n", + " '000000323151',\n", + " '000000089648',\n", + " '000000219283',\n", + " '000000471869',\n", + " '000000520264',\n", + " '000000111179',\n", + " '000000151000',\n", + " '000000100624',\n", + " '000000332570',\n", + " '000000057238',\n", + " '000000502732',\n", + " '000000135561',\n", + " '000000008277',\n", + " '000000173044',\n", + " '000000168458',\n", + " '000000512194',\n", + " '000000370042',\n", + " '000000189436',\n", + " '000000533958',\n", + " '000000117645',\n", + " '000000221708',\n", + " '000000202228',\n", + " '000000403565',\n", + " '000000211042',\n", + " '000000492878',\n", + " '000000441586',\n", + " '000000547816',\n", + " '000000306733',\n", + " '000000530099',\n", + " '000000312278',\n", + " '000000097679',\n", + " '000000564127',\n", + " '000000251065',\n", + " '000000003845',\n", + " '000000138819',\n", + " '000000205834',\n", + " '000000348708',\n", + " '000000166521',\n", + " '000000485802',\n", + " '000000099054',\n", + " '000000022969',\n", + " '000000570539',\n", + " '000000278353',\n", + " '000000158548',\n", + " '000000461405',\n", + " '000000176606',\n", + " '000000044699',\n", + " '000000559956',\n", + " '000000268996',\n", + " '000000011197',\n", + " '000000483667',\n", + " '000000448810',\n", + " '000000000724',\n", + " '000000051961',\n", + " '000000375278',\n", + " '000000302165',\n", + " '000000131131',\n", + " '000000098839',\n", + " '000000402992',\n", + " '000000465675',\n", + " '000000240754',\n", + " '000000021167',\n", + " '000000148730',\n", + " '000000384468',\n", + " '000000253742',\n", + " '000000186873',\n", + " '000000082180',\n", + " '000000446522',\n", + " '000000552902',\n", + " '000000125405',\n", + " '000000110211',\n", + " '000000016010',\n", + " '000000064462',\n", + " '000000314182',\n", + " '000000248980',\n", + " '000000068387',\n", + " '000000429281',\n", + " '000000345466',\n", + " '000000352900',\n", + " '000000118367',\n", + " '000000113235',\n", + " '000000311303',\n", + " '000000163640',\n", + " '000000370999',\n", + " '000000001490',\n", + " '000000329456',\n", + " '000000570471',\n", + " '000000088269',\n", + " '000000260470',\n", + " '000000193494',\n", + " '000000252776',\n", + " '000000201072',\n", + " '000000018150',\n", + " '000000337498',\n", + " '000000521405',\n", + " '000000518770',\n", + " '000000201646',\n", + " '000000036936',\n", + " '000000059044',\n", + " '000000172946',\n", + " '000000234607',\n", + " '000000532690',\n", + " '000000323895',\n", + " '000000384670',\n", + " '000000050326',\n", + " '000000205542',\n", + " '000000217957',\n", + " '000000162035',\n", + " '000000415727',\n", + " '000000046252',\n", + " '000000182021',\n", + " '000000231747',\n", + " '000000090284',\n", + " '000000286553',\n", + " '000000488736',\n", + " '000000063602',\n", + " '000000383386',\n", + " '000000450686',\n", + " '000000005060',\n", + " '000000286523',\n", + " '000000120420',\n", + " '000000579655',\n", + " '000000117908',\n", + " '000000550322',\n", + " '000000322844',\n", + " '000000218362',\n", + " '000000213224',\n", + " '000000223747',\n", + " '000000297578',\n", + " '000000458992',\n", + " '000000078266',\n", + " '000000164602',\n", + " '000000440475',\n", + " '000000101762',\n", + " '000000557501',\n", + " '000000203317',\n", + " '000000368940',\n", + " '000000569917',\n", + " '000000144798',\n", + " '000000284623',\n", + " '000000520301',\n", + " '000000127987',\n", + " '000000063740',\n", + " '000000036494',\n", + " '000000210032',\n", + " '000000488270',\n", + " '000000067180',\n", + " '000000281179',\n", + " '000000064359',\n", + " '000000126226',\n", + " '000000190923',\n", + " '000000150265',\n", + " '000000216739',\n", + " '000000038048',\n", + " '000000354829',\n", + " '000000525155',\n", + " '000000163314',\n", + " '000000259571',\n", + " '000000561679',\n", + " '000000236166',\n", + " '000000153529',\n", + " '000000473015',\n", + " '000000379800',\n", + " '000000253835',\n", + " '000000034071',\n", + " '000000036861',\n", + " '000000569565',\n", + " '000000219271',\n", + " '000000205647',\n", + " '000000460841',\n", + " '000000123131',\n", + " '000000334006',\n", + " '000000511599',\n", + " '000000229858',\n", + " '000000174004',\n", + " '000000519764',\n", + " '000000137576',\n", + " '000000087470',\n", + " '000000009769',\n", + " '000000558114',\n", + " '000000205776',\n", + " '000000163257',\n", + " '000000475678',\n", + " '000000085478',\n", + " '000000318080',\n", + " '000000361551',\n", + " '000000236784',\n", + " '000000092839',\n", + " '000000042296',\n", + " '000000560266',\n", + " '000000486479',\n", + " '000000127955',\n", + " '000000307658',\n", + " '000000417465',\n", + " '000000342971',\n", + " '000000011760',\n", + " '000000069106',\n", + " '000000070158',\n", + " '000000176634',\n", + " '000000281447',\n", + " '000000552371',\n", + " '000000361919',\n", + " '000000560256',\n", + " '000000138115',\n", + " '000000114871',\n", + " '000000374369',\n", + " '000000123213',\n", + " '000000123321',\n", + " '000000015278',\n", + " '000000357742',\n", + " '000000439854',\n", + " '000000465836',\n", + " '000000414385',\n", + " '000000131556',\n", + " '000000322724',\n", + " '000000320664',\n", + " '000000481390',\n", + " '000000109916',\n", + " '000000276434',\n", + " '000000579635',\n", + " '000000295316',\n", + " '000000571313',\n", + " '000000183127',\n", + " '000000115898',\n", + " '000000146358',\n", + " '000000329542',\n", + " '000000189752',\n", + " '000000290163',\n", + " '000000091406',\n", + " '000000322352',\n", + " '000000223959',\n", + " '000000326248',\n", + " '000000218439',\n", + " '000000453722',\n", + " '000000293625',\n", + " '000000411817',\n", + " '000000546964',\n", + " '000000215259',\n", + " '000000573094',\n", + " '000000560011',\n", + " '000000038576',\n", + " '000000147729',\n", + " '000000579307',\n", + " '000000154425',\n", + " '000000432898',\n", + " '000000404923',\n", + " '000000130586',\n", + " '000000163057',\n", + " '000000007511',\n", + " '000000067406',\n", + " '000000290179',\n", + " '000000248752',\n", + " '000000054593',\n", + " '000000116208',\n", + " '000000340697',\n", + " '000000450303',\n", + " '000000494427',\n", + " '000000137294',\n", + " '000000410880',\n", + " '000000311180',\n", + " '000000091654',\n", + " '000000181796',\n", + " '000000002431',\n", + " '000000349184',\n", + " '000000298396',\n", + " '000000472046',\n", + " '000000074058',\n", + " '000000058029',\n", + " '000000134096',\n", + " '000000111951',\n", + " '000000103585',\n", + " '000000210273',\n", + " '000000352584',\n", + " '000000446651',\n", + " '000000194875',\n", + " '000000052017',\n", + " '000000336309',\n", + " '000000227478',\n", + " '000000339870',\n", + " '000000080666',\n", + " '000000033707',\n", + " '000000327601',\n", + " '000000255749',\n", + " '000000008762',\n", + " '000000526392',\n", + " '000000535578',\n", + " '000000580757',\n", + " '000000165039',\n", + " '000000148719',\n", + " '000000108440',\n", + " '000000489842',\n", + " '000000579818',\n", + " '000000423229',\n", + " '000000323828',\n", + " '000000166287',\n", + " '000000101420',\n", + " '000000334555',\n", + " '000000196759',\n", + " '000000411665',\n", + " '000000061418',\n", + " '000000526751',\n", + " '000000024021',\n", + " '000000277020',\n", + " '000000047828',\n", + " '000000183716',\n", + " '000000271997',\n", + " '000000008532',\n", + " '000000094336',\n", + " '000000390555',\n", + " '000000250282',\n", + " '000000068409',\n", + " '000000002299',\n", + " '000000011051',\n", + " '000000066038',\n", + " '000000360960',\n", + " '000000360097',\n", + " '000000421455',\n", + " '000000504589',\n", + " '000000464522',\n", + " '000000454750',\n", + " '000000509735',\n", + " '000000023034',\n", + " '000000141671',\n", + " '000000506656',\n", + " '000000272566',\n", + " '000000045728',\n", + " '000000424551',\n", + " '000000341719',\n", + " '000000072795',\n", + " '000000078959',\n", + " '000000417285',\n", + " '000000002157',\n", + " '000000043816',\n", + " '000000455555',\n", + " '000000535306',\n", + " '000000030504',\n", + " '000000093353',\n", + " '000000530052',\n", + " '000000473118',\n", + " '000000091779',\n", + " '000000283113',\n", + " '000000226130',\n", + " '000000097278',\n", + " '000000567640',\n", + " '000000532493',\n", + " '000000045550',\n", + " '000000156643',\n", + " '000000430056',\n", + " '000000410456',\n", + " '000000441286',\n", + " '000000279541',\n", + " '000000000885',\n", + " '000000378284',\n", + " '000000156076',\n", + " '000000143572',\n", + " '000000229849',\n", + " '000000039551',\n", + " '000000056344',\n", + " '000000193348',\n", + " '000000016958',\n", + " '000000572678',\n", + " '000000106235',\n", + " '000000341681',\n", + " '000000083172',\n", + " '000000343524',\n", + " '000000395801',\n", + " '000000388056',\n", + " '000000259690',\n", + " '000000235836',\n", + " '000000343218',\n", + " '000000205105',\n", + " '000000513283',\n", + " '000000176446',\n", + " '000000371677',\n", + " '000000308531',\n", + " '000000497599',\n", + " '000000455352',\n", + " '000000236914',\n", + " '000000232684',\n", + " '000000415238',\n", + " '000000290843',\n", + " '000000519522',\n", + " '000000144784',\n", + " '000000167486',\n", + " '000000392228',\n", + " '000000488673',\n", + " '000000191013',\n", + " '000000080057',\n", + " '000000570169',\n", + " '000000224807',\n", + " '000000163562',\n", + " '000000136355',\n", + " '000000492362',\n", + " '000000102707',\n", + " '000000232563',\n", + " '000000010977',\n", + " '000000051598',\n", + " '000000032285',\n", + " '000000520910',\n", + " '000000131273',\n", + " '000000206411',\n", + " '000000472375',\n", + " '000000481404',\n", + " '000000471991',\n", + " '000000017436',\n", + " '000000177934',\n", + " '000000165518',\n", + " '000000571718',\n", + " '000000459467',\n", + " '000000135673',\n", + " '000000134886',\n", + " '000000485895',\n", + " '000000287545',\n", + " '000000577182',\n", + " '000000289222',\n", + " '000000372819',\n", + " '000000310072',\n", + " '000000087144',\n", + " '000000430875',\n", + " '000000060347',\n", + " '000000042070',\n", + " '000000420916',\n", + " '000000453584',\n", + " '000000296224',\n", + " '000000122606',\n", + " '000000311909',\n", + " '000000579893',\n", + " '000000284296',\n", + " '000000221017',\n", + " '000000315001',\n", + " '000000439715',\n", + " '000000284991',\n", + " '000000389566',\n", + " '000000078843',\n", + " '000000122927',\n", + " '000000225532',\n", + " '000000013659',\n", + " '000000153568',\n", + " '000000395633',\n", + " '000000419096',\n", + " '000000203488',\n", + " '000000361268',\n", + " '000000466125',\n", + " '000000414795',\n", + " '000000508101',\n", + " '000000253386',\n", + " '000000222991',\n", + " '000000530854',\n", + " '000000351810',\n", + " '000000338624',\n", + " '000000138492',\n", + " '000000263463',\n", + " '000000226592',\n", + " '000000378454',\n", + " '000000020059',\n", + " '000000227686',\n", + " '000000476215',\n", + " '000000297698',\n", + " '000000247917',\n", + " '000000439522',\n", + " '000000479448',\n", + " '000000424721',\n", + " '000000026690',\n", + " '000000558854',\n", + " '000000176901',\n", + " '000000334767',\n", + " '000000301563',\n", + " '000000086755',\n", + " '000000194471',\n", + " '000000420281',\n", + " '000000533206',\n", + " '000000099810',\n", + " '000000334483',\n", + " '000000089670',\n", + " '000000482275',\n", + " '000000404805',\n", + " '000000002261',\n", + " '000000425702',\n", + " '000000036844',\n", + " '000000012576',\n", + " '000000361238',\n", + " '000000108253',\n", + " '000000319935',\n", + " '000000003934',\n", + " '000000029596',\n", + " '000000047740',\n", + " '000000077460',\n", + " '000000014439',\n", + " '000000571893',\n", + " '000000447314',\n", + " '000000181303',\n", + " '000000058350',\n", + " '000000026465',\n", + " '000000246968',\n", + " '000000536947',\n", + " '000000076731',\n", + " '000000286182',\n", + " '000000433980',\n", + " '000000561366',\n", + " '000000380913',\n", + " '000000032887',\n", + " '000000517687',\n", + " '000000213035',\n", + " '000000399205',\n", + " '000000349837',\n", + " '000000350002',\n", + " '000000131431',\n", + " '000000356248',\n", + " '000000334399',\n", + " '000000057150',\n", + " '000000363666',\n", + " '000000507235',\n", + " '000000169996',\n", + " '000000226417',\n", + " '000000481573',\n", + " '000000056127',\n", + " '000000123480',\n", + " '000000274687',\n", + " '000000164637',\n", + " '000000178028',\n", + " '000000493286',\n", + " '000000348216',\n", + " '000000345027',\n", + " '000000571804',\n", + " '000000140658',\n", + " '000000102644',\n", + " '000000581615',\n", + " '000000279887',\n", + " '000000230008',\n", + " '000000284698',\n", + " '000000102356',\n", + " '000000456394',\n", + " '000000323709',\n", + " '000000452122',\n", + " '000000579158',\n", + " '000000525322',\n", + " '000000033114',\n", + " '000000008690',\n", + " '000000381639',\n", + " '000000217614',\n", + " '000000284445',\n", + " '000000468124',\n", + " '000000187144',\n", + " '000000273198',\n", + " '000000095843',\n", + " '000000417779',\n", + " '000000447342',\n", + " '000000166563',\n", + " '000000490125',\n", + " '000000561009',\n", + " '000000183675',\n", + " '000000290248',\n", + " '000000532058',\n", + " '000000214200',\n", + " '000000578093',\n", + " '000000369751',\n", + " '000000429011',\n", + " '000000301061',\n", + " '000000105264',\n", + " '000000267434',\n", + " '000000370711',\n", + " '000000025393',\n", + " '000000471087',\n", + " '000000106757',\n", + " '000000183648',\n", + " '000000358525',\n", + " '000000049269',\n", + " '000000079144',\n", + " '000000519688',\n", + " '000000431727',\n", + " '000000130699',\n", + " '000000215245',\n", + " '000000091921',\n", + " '000000218424',\n", + " '000000473974',\n", + " '000000405249',\n", + " '000000235784',\n", + " '000000521540',\n", + " '000000537506',\n", + " '000000119445',\n", + " '000000507015',\n", + " '000000173830',\n", + " '000000356498',\n", + " '000000435081',\n", + " '000000018575',\n", + " '000000373315',\n", + " '000000227765',\n", + " '000000013546',\n", + " '000000067310',\n", + " '000000125936',\n", + " '000000389109',\n", + " '000000322211',\n", + " '000000184384',\n", + " '000000426329',\n", + " '000000128476',\n", + " '000000414034',\n", + " '000000450488',\n", + " '000000099182',\n", + " '000000051738',\n", + " '000000099039',\n", + " '000000075456',\n", + " '000000134882',\n", + " '000000442323',\n", + " '000000232489',\n", + " '000000351823',\n", + " '000000065736',\n", + " '000000001000',\n", + " '000000379842',\n", + " '000000013923',\n", + " '000000559543',\n", + " '000000185890',\n", + " '000000357978',\n", + " '000000129492',\n", + " '000000261097',\n", + " '000000410510',\n", + " '000000039951',\n", + " '000000306700',\n", + " '000000146457',\n", + " '000000214224',\n", + " '000000332845',\n", + " '000000255483',\n", + " '000000222455',\n", + " '000000187271',\n", + " '000000462629',\n", + " '000000544565',\n", + " '000000369771',\n", + " '000000035963',\n", + " '000000289516',\n", + " '000000334309',\n", + " '000000452084',\n", + " '000000301718',\n", + " '000000429598',\n", + " '000000165257',\n", + " '000000093437',\n", + " '000000413552',\n", + " '000000062025',\n", + " '000000017379',\n", + " '000000176778',\n", + " '000000104572',\n", + " '000000090108',\n", + " '000000157124',\n", + " '000000089556',\n", + " '000000266206',\n", + " '000000086220',\n", + " '000000508602',\n", + " ...]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "rollback_dataset = dataset.transform(\"id_from_image_name\")\n", + "get_ids(rollback_dataset, subsets[0])" + ] + }, + { + "cell_type": "markdown", + "id": "a1e290fe", + "metadata": {}, + "source": [ + "### Transform annotation\n", "\n", - "ids = get_ids(rollback_dataset, subsets[0])\n", - "print(dataset)" + "For the task-chanining or merging multiple heterogeneous datasets, we need to redefine the class definition. Datumaro provides this class redefinition through `remap_labels` as below. " ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 18, "id": "a2515d03", "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "__init__() got an unexpected keyword argument 'regex'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/core/formatters.py:706\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 699\u001b[0m stream \u001b[39m=\u001b[39m StringIO()\n\u001b[1;32m 700\u001b[0m printer \u001b[39m=\u001b[39m pretty\u001b[39m.\u001b[39mRepresentationPrinter(stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mverbose,\n\u001b[1;32m 701\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_width, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnewline,\n\u001b[1;32m 702\u001b[0m max_seq_length\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmax_seq_length,\n\u001b[1;32m 703\u001b[0m singleton_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39msingleton_printers,\n\u001b[1;32m 704\u001b[0m type_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtype_printers,\n\u001b[1;32m 705\u001b[0m deferred_pprinters\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 706\u001b[0m printer\u001b[39m.\u001b[39;49mpretty(obj)\n\u001b[1;32m 707\u001b[0m printer\u001b[39m.\u001b[39mflush()\n\u001b[1;32m 708\u001b[0m \u001b[39mreturn\u001b[39;00m stream\u001b[39m.\u001b[39mgetvalue()\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[39mreturn\u001b[39;00m meth(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m 408\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mobject\u001b[39m \\\n\u001b[1;32m 409\u001b[0m \u001b[39mand\u001b[39;00m callable(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39m__repr__\u001b[39m\u001b[39m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m \u001b[39mreturn\u001b[39;00m _repr_pprint(obj, \u001b[39mself\u001b[39;49m, cycle)\n\u001b[1;32m 412\u001b[0m \u001b[39mreturn\u001b[39;00m _default_pprint(obj, \u001b[39mself\u001b[39m, cycle)\n\u001b[1;32m 413\u001b[0m \u001b[39mfinally\u001b[39;00m:\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[39m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m 777\u001b[0m \u001b[39m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[39m=\u001b[39m \u001b[39mrepr\u001b[39;49m(obj)\n\u001b[1;32m 779\u001b[0m lines \u001b[39m=\u001b[39m output\u001b[39m.\u001b[39msplitlines()\n\u001b[1;32m 780\u001b[0m \u001b[39mwith\u001b[39;00m p\u001b[39m.\u001b[39mgroup():\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 843\u001b[0m separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 844\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 846\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 847\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 848\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 849\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 850\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 851\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 852\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 853\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 854\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 855\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m 569\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m 571\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 363\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m 365\u001b[0m \u001b[39mpass\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m 368\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 369\u001b[0m \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m 372\u001b[0m \u001b[39myield\u001b[39;00m item\n\u001b[1;32m 373\u001b[0m \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m 452\u001b[0m \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m 453\u001b[0m \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[39mpass\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m 400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m 402\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m 404\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n", - "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'" - ] + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=123287\n", + "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=122218\n", + "\tannotations_count=1018861\n", + "subsets\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -216,80 +3243,175 @@ "remap_label_dataset" ] }, + { + "cell_type": "markdown", + "id": "4335c84d", + "metadata": {}, + "source": [ + "### Split datasets\n", + "\n", + "From now on, we are going to give examples of extracting the subset of the imported dataset and splitting this into multiple subsets. Datumaro provides two types of splitter; one is the per-sample level random splitter from the given ratio of subsets and the other is the task-specific splitter under consideration of annotation instances.\n", + "\n", + "We first extract the validation dataset and split this into multiple cross-validation datasets." + ] + }, { "cell_type": "code", - "execution_count": 51, - "id": "d0bcd69e", + "execution_count": 20, + "id": "96a8e001", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset\n", - "\tsize=123287\n", - "\tsource_path=coco_dataset\n", - "\tmedia_type=\n", - "\tannotated_items_count=122218\n", - "\tannotations_count=1018861\n", - "subsets\n", - "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", - "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", - "categories\n", - "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", - "\n" - ] + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=5000\n", + "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=4952\n", + "\tannotations_count=41866\n", + "subsets\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(dataset)" + "from datumaro.components.dataset import Dataset\n", + "\n", + "val_dataset = dataset.filter('/item[subset=\"val2017\"]') # or Dataset(dataset.get_subset(subsets[0]))\n", + "val_dataset" ] }, { "cell_type": "code", - "execution_count": 45, - "id": "96a8e001", + "execution_count": 21, + "id": "97d25c76", "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "__init__() got an unexpected keyword argument 'regex'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn [45], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m strr \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39m|\u001b[39m\u001b[39m\\1\u001b[39;00m\u001b[39m|^image_|\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 2\u001b[0m renamed_dataset \u001b[39m=\u001b[39m dataset\u001b[39m.\u001b[39mtransform(\u001b[39m\"\u001b[39m\u001b[39mrename\u001b[39m\u001b[39m\"\u001b[39m, regex\u001b[39m=\u001b[39mstrr)\n\u001b[0;32m----> 3\u001b[0m \u001b[39mprint\u001b[39;49m(renamed_dataset)\n\u001b[1;32m 5\u001b[0m \u001b[39m# ids = get_ids(dataset, subsets[0])\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39m# print('val2017', ids)\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:845\u001b[0m, in \u001b[0;36mDataset.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__repr__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mstr\u001b[39m:\n\u001b[1;32m 843\u001b[0m separator \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 844\u001b[0m \u001b[39mreturn\u001b[39;00m (\n\u001b[0;32m--> 845\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mDataset\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 846\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msize=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data)\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 847\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39msource_path=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_source_path\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 848\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mmedia_type=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmedia_type()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 849\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotated_items_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotated_items()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 850\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39mannotations_count=\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_annotations()\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 851\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39msubsets\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 852\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_subset_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 853\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcategories\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 854\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m\\t\u001b[39;00m\u001b[39m{\u001b[39;00mseparator\u001b[39m.\u001b[39mjoin(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_categories_info())\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 855\u001b[0m )\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:570\u001b[0m, in \u001b[0;36mDatasetStorage.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__len__\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mint\u001b[39m:\n\u001b[1;32m 569\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49minit_cache()\n\u001b[1;32m 571\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_length\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:364\u001b[0m, in \u001b[0;36mDatasetStorage.init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 362\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39minit_cache\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[1;32m 363\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_cache_initialized():\n\u001b[0;32m--> 364\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache():\n\u001b[1;32m 365\u001b[0m \u001b[39mpass\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:371\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_iter_init_cache\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Iterable[DatasetItem]:\n\u001b[1;32m 368\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 369\u001b[0m \u001b[39m# Can't just return from the method, because it won't add exception handling\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[39m# It covers cases when we save the null error handler in the source\u001b[39;00m\n\u001b[0;32m--> 371\u001b[0m \u001b[39mfor\u001b[39;00m item \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_iter_init_cache_unchecked():\n\u001b[1;32m 372\u001b[0m \u001b[39myield\u001b[39;00m item\n\u001b[1;32m 373\u001b[0m \u001b[39mexcept\u001b[39;00m _ImportFail \u001b[39mas\u001b[39;00m e:\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:451\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 448\u001b[0m transform \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_transforms:\n\u001b[0;32m--> 451\u001b[0m transform \u001b[39m=\u001b[39m _StackedTransform(source, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_transforms)\n\u001b[1;32m 452\u001b[0m \u001b[39mif\u001b[39;00m transform\u001b[39m.\u001b[39mis_local:\n\u001b[1;32m 453\u001b[0m \u001b[39m# An optimized way to find modified items:\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[39m# Transform items inplace and analyze transform outputs\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[39mpass\u001b[39;00m\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:401\u001b[0m, in \u001b[0;36mDatasetStorage._iter_init_cache_unchecked.._StackedTransform.__init__\u001b[0;34m(self, source, transforms)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms: List[Transform] \u001b[39m=\u001b[39m []\n\u001b[1;32m 400\u001b[0m \u001b[39mfor\u001b[39;00m transform \u001b[39min\u001b[39;00m transforms:\n\u001b[0;32m--> 401\u001b[0m source \u001b[39m=\u001b[39m transform[\u001b[39m0\u001b[39;49m](source, \u001b[39m*\u001b[39;49mtransform[\u001b[39m1\u001b[39;49m], \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mtransform[\u001b[39m2\u001b[39;49m])\n\u001b[1;32m 402\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtransforms\u001b[39m.\u001b[39mappend(source)\n\u001b[1;32m 404\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_local \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(source, ItemTransform):\n", - "\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'regex'" - ] + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=5000\n", + "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=4952\n", + "\tannotations_count=41866\n", + "subsets\n", + "\tval1: # of items=1000, # of annotated items=993, # of annotations=8365, annotation types=['polygon', 'mask']\n", + "\tval2: # of items=1000, # of annotated items=990, # of annotations=8161, annotation types=['polygon', 'mask']\n", + "\tval3: # of items=1000, # of annotated items=989, # of annotations=8390, annotation types=['polygon', 'mask']\n", + "\tval4: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n", + "\tval5: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# strr = '|\\1|^image_|'\n", - "# renamed_dataset = dataset.transform(\"rename\", regex=strr)\n", - "# print(renamed_dataset)\n", - "\n", - "# ids = get_ids(dataset, subsets[0])\n", - "# print('val2017', ids)" + "splits = ((\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2))\n", + "crossval_dataset = val_dataset.transform(\"random_split\", splits=splits)\n", + "crossval_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "43e95f07", + "metadata": {}, + "source": [ + "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample throguh. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations." ] }, { "cell_type": "code", - "execution_count": 8, - "id": "4ed4a847", + "execution_count": 22, + "id": "f2cee2b8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=5000\n", + "\tsource_path=None\n", + "\tmedia_type=\n", + "\tannotated_items_count=4952\n", + "\tannotations_count=41866\n", + "subsets\n", + "\tval1: # of items=1035, # of annotated items=1035, # of annotations=8374, annotation types=['polygon', 'mask']\n", + "\tval2: # of items=1000, # of annotated items=997, # of annotations=8376, annotation types=['polygon', 'mask']\n", + "\tval3: # of items=1000, # of annotated items=959, # of annotations=8366, annotation types=['polygon', 'mask']\n", + "\tval4: # of items=927, # of annotated items=923, # of annotations=8376, annotation types=['polygon', 'mask']\n", + "\tval5: # of items=1038, # of annotated items=1038, # of annotations=8374, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from datumaro.components.visualizer import Visualizer\n", + "import datumaro.plugins.splitter as splitter\n", + "\n", + "task = splitter.SplitTask.segmentation.name\n", + "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n", "\n", - "visualizer = Visualizer(dataset, figsize=(8, 8), alpha=0.7)\n", - "fig = visualizer.vis_gallery(ids[:4], subsets[0], (2, 2))\n", - "fig.show()" + "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n", + "crossval_per_ann_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "274ff8ed", + "metadata": {}, + "source": [ + "Lastly, we can rename the subset as below. " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f5acac08", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=5000\n", + "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=4952\n", + "\tannotations_count=41866\n", + "subsets\n", + "\ttest: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n", + "\ttrain: # of items=3000, # of annotated items=2972, # of annotations=24916, annotation types=['polygon', 'mask']\n", + "\tval: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping={\"val1\": \"train\", \"val2\": \"train\", \"val3\": \"train\", \"val4\": \"val\", \"val5\": \"test\"}\n", + "test_dataset = dataset.transform(\"map_subsets\", mapping=mapping)\n", + "test_dataset" ] } ], From c35986cfe7a654569a492d4dcde80cd5b82fb903 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 11 Nov 2022 00:31:33 +0900 Subject: [PATCH 3/8] update changelog --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75a708f225..a5feafcbb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support for exclusive of labels with LabelGroup () -- Add jupyter sample introducing how to merge datasets +- Add jupyter samples + - introducing how to merge datasets () + - introducing how to visualize dataset + () - introducing how to filter dataset () + - introducing how to transform dataset + () - Add Visualization Python API - Bbox () From 0d3a4dc2cacd4a79473912af792aae1dce99e58b Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 11 Nov 2022 00:36:38 +0900 Subject: [PATCH 4/8] update notebook --- notebooks/05_transform.ipynb | 84 ++++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb index a0d465d8ab..6fae04bbda 100644 --- a/notebooks/05_transform.ipynb +++ b/notebooks/05_transform.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "id": "da198c67", "metadata": {}, "outputs": [ @@ -37,12 +37,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File '/home/wonju/data/datasets/coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" + "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" ] }, { @@ -57,7 +57,7 @@ "text/plain": [ "Dataset\n", "\tsize=123287\n", - "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=122218\n", "\tannotations_count=1018861\n", @@ -68,7 +68,7 @@ "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 14, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 2, "id": "d38cfc9b", "metadata": { "scrolled": true @@ -1118,7 +1118,7 @@ " ...]" ] }, - "execution_count": 15, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -1148,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 3, "id": "51bf3388", "metadata": {}, "outputs": [ @@ -2158,7 +2158,7 @@ " ...]" ] }, - "execution_count": 16, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -2178,7 +2178,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 4, "id": "fb608396", "metadata": { "scrolled": true @@ -3190,7 +3190,7 @@ " ...]" ] }, - "execution_count": 17, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -3212,7 +3212,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 5, "id": "a2515d03", "metadata": {}, "outputs": [ @@ -3221,7 +3221,7 @@ "text/plain": [ "Dataset\n", "\tsize=123287\n", - "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=122218\n", "\tannotations_count=1018861\n", @@ -3232,7 +3232,7 @@ "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 18, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -3257,7 +3257,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "id": "96a8e001", "metadata": {}, "outputs": [ @@ -3266,7 +3266,7 @@ "text/plain": [ "Dataset\n", "\tsize=5000\n", - "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", @@ -3276,7 +3276,7 @@ "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 20, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -3290,7 +3290,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 7, "id": "97d25c76", "metadata": {}, "outputs": [ @@ -3299,21 +3299,21 @@ "text/plain": [ "Dataset\n", "\tsize=5000\n", - "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\tval1: # of items=1000, # of annotated items=993, # of annotations=8365, annotation types=['polygon', 'mask']\n", - "\tval2: # of items=1000, # of annotated items=990, # of annotations=8161, annotation types=['polygon', 'mask']\n", - "\tval3: # of items=1000, # of annotated items=989, # of annotations=8390, annotation types=['polygon', 'mask']\n", - "\tval4: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n", - "\tval5: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n", + "\tval1: # of items=1000, # of annotated items=993, # of annotations=8237, annotation types=['polygon', 'mask']\n", + "\tval2: # of items=1000, # of annotated items=989, # of annotations=8542, annotation types=['polygon', 'mask']\n", + "\tval3: # of items=1000, # of annotated items=986, # of annotations=8237, annotation types=['polygon', 'mask']\n", + "\tval4: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n", + "\tval5: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 21, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -3334,7 +3334,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 8, "id": "f2cee2b8", "metadata": {}, "outputs": [ @@ -3348,16 +3348,16 @@ "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\tval1: # of items=1035, # of annotated items=1035, # of annotations=8374, annotation types=['polygon', 'mask']\n", - "\tval2: # of items=1000, # of annotated items=997, # of annotations=8376, annotation types=['polygon', 'mask']\n", - "\tval3: # of items=1000, # of annotated items=959, # of annotations=8366, annotation types=['polygon', 'mask']\n", - "\tval4: # of items=927, # of annotated items=923, # of annotations=8376, annotation types=['polygon', 'mask']\n", - "\tval5: # of items=1038, # of annotated items=1038, # of annotations=8374, annotation types=['polygon', 'mask']\n", + "\tval1: # of items=1029, # of annotated items=1029, # of annotations=8381, annotation types=['polygon', 'mask']\n", + "\tval2: # of items=1000, # of annotated items=975, # of annotations=8374, annotation types=['polygon', 'mask']\n", + "\tval3: # of items=1009, # of annotated items=1009, # of annotations=8376, annotation types=['polygon', 'mask']\n", + "\tval4: # of items=966, # of annotated items=943, # of annotations=8374, annotation types=['polygon', 'mask']\n", + "\tval5: # of items=996, # of annotated items=996, # of annotations=8361, annotation types=['polygon', 'mask']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 22, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -3382,7 +3382,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 9, "id": "f5acac08", "metadata": {}, "outputs": [ @@ -3391,19 +3391,19 @@ "text/plain": [ "Dataset\n", "\tsize=5000\n", - "\tsource_path=/home/wonju/data/datasets/coco_dataset\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\ttest: # of items=1000, # of annotated items=991, # of annotations=9024, annotation types=['polygon', 'mask']\n", - "\ttrain: # of items=3000, # of annotated items=2972, # of annotations=24916, annotation types=['polygon', 'mask']\n", - "\tval: # of items=1000, # of annotated items=989, # of annotations=7926, annotation types=['polygon', 'mask']\n", + "\ttest: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n", + "\ttrain: # of items=3000, # of annotated items=2968, # of annotations=25016, annotation types=['polygon', 'mask']\n", + "\tval: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 23, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } From f01b55fd2e462c5b6aca3e76c2075974c0350ef8 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 11 Nov 2022 23:12:15 +0900 Subject: [PATCH 5/8] pylint resolve --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5feafcbb9..69e3b48672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support for exclusive of labels with LabelGroup () -- Add jupyter samples +- Add jupyter samples - introducing how to merge datasets () - introducing how to visualize dataset From f958bdfe89f2e107bcfff9eb44989fa94f69b925 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Tue, 15 Nov 2022 18:40:38 +0900 Subject: [PATCH 6/8] correct a notebook for transform api --- notebooks/05_transform.ipynb | 62 +++++++++++++----------------------- 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb index 6fae04bbda..b3569a9a62 100644 --- a/notebooks/05_transform.ipynb +++ b/notebooks/05_transform.ipynb @@ -37,40 +37,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", - "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" + "2022-11-15 18:27:33.901721: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", + "WARNING:root:Failed to import module 'datumaro.plugins.openvino_plugin.launcher': libpython3.8.so.1.0: cannot open shared object file: No such file or directory\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Representation for sample COCO dataset\n" + "ename": "Exception", + "evalue": "Failed to find 'coco' dataset at 'coco_dataset'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [1], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatumaro\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdm\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m dataset \u001b[39m=\u001b[39m dm\u001b[39m.\u001b[39;49mDataset\u001b[39m.\u001b[39;49mimport_from(\u001b[39m'\u001b[39;49m\u001b[39mcoco_dataset\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcoco_instances\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 10\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mRepresentation for sample COCO dataset\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 11\u001b[0m dataset\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:1238\u001b[0m, in \u001b[0;36mDataset.import_from\u001b[0;34m(cls, path, format, env, progress_reporter, error_policy, **kwargs)\u001b[0m\n\u001b[1;32m 1236\u001b[0m importer \u001b[39m=\u001b[39m env\u001b[39m.\u001b[39mmake_importer(\u001b[39mformat\u001b[39m)\n\u001b[1;32m 1237\u001b[0m \u001b[39mwith\u001b[39;00m logging_disabled(log\u001b[39m.\u001b[39mINFO):\n\u001b[0;32m-> 1238\u001b[0m detected_sources \u001b[39m=\u001b[39m importer(path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1239\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mformat\u001b[39m \u001b[39min\u001b[39;00m env\u001b[39m.\u001b[39mextractors:\n\u001b[1;32m 1240\u001b[0m detected_sources \u001b[39m=\u001b[39m [{\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: path, \u001b[39m\"\u001b[39m\u001b[39mformat\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mformat\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39moptions\u001b[39m\u001b[39m\"\u001b[39m: kwargs}]\n", + "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/plugins/coco_format/importer.py:68\u001b[0m, in \u001b[0;36mCocoImporter.__call__\u001b[0;34m(self, path, **extra_params)\u001b[0m\n\u001b[1;32m 65\u001b[0m subsets \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfind_sources(path)\n\u001b[1;32m 67\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(subsets) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m---> 68\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mException\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mFailed to find \u001b[39m\u001b[39m'\u001b[39m\u001b[39mcoco\u001b[39m\u001b[39m'\u001b[39m\u001b[39m dataset at \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m path)\n\u001b[1;32m 70\u001b[0m \u001b[39m# TODO: should be removed when proper label merging is implemented\u001b[39;00m\n\u001b[1;32m 71\u001b[0m conflicting_types \u001b[39m=\u001b[39m {\n\u001b[1;32m 72\u001b[0m CocoTask\u001b[39m.\u001b[39minstances,\n\u001b[1;32m 73\u001b[0m CocoTask\u001b[39m.\u001b[39mperson_keypoints,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 76\u001b[0m CocoTask\u001b[39m.\u001b[39mstuff,\n\u001b[1;32m 77\u001b[0m }\n", + "\u001b[0;31mException\u001b[0m: Failed to find 'coco' dataset at 'coco_dataset'" ] - }, - { - "data": { - "text/plain": [ - "Dataset\n", - "\tsize=123287\n", - "\tsource_path=coco_dataset\n", - "\tmedia_type=\n", - "\tannotated_items_count=122218\n", - "\tannotations_count=1018861\n", - "subsets\n", - "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", - "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", - "categories\n", - "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -78,7 +60,6 @@ "#\n", "# SPDX-License-Identifier: MIT\n", "\n", - "import os\n", "import datumaro as dm\n", "\n", "dataset = dm.Dataset.import_from('coco_dataset', format='coco_instances')\n", @@ -99,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "d38cfc9b", "metadata": { "scrolled": true @@ -1148,7 +1129,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "51bf3388", "metadata": {}, "outputs": [ @@ -2178,7 +2159,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "fb608396", "metadata": { "scrolled": true @@ -3212,7 +3193,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "a2515d03", "metadata": {}, "outputs": [ @@ -3257,7 +3238,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "96a8e001", "metadata": {}, "outputs": [ @@ -3290,7 +3271,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "97d25c76", "metadata": {}, "outputs": [ @@ -3329,12 +3310,12 @@ "id": "43e95f07", "metadata": {}, "source": [ - "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample throguh. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations." + "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "f2cee2b8", "metadata": {}, "outputs": [ @@ -3368,6 +3349,7 @@ "task = splitter.SplitTask.segmentation.name\n", "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n", "\n", + "crossval_per_ann_dataset = val_dataset.transform(\"split\", task=task, splits=splits)\n", "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n", "crossval_per_ann_dataset" ] @@ -3382,7 +3364,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "f5acac08", "metadata": {}, "outputs": [ From 5e58ea450eabdf3f1fb1be3ddbcce40b6b2880af Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Tue, 15 Nov 2022 18:42:21 +0900 Subject: [PATCH 7/8] correct a notebook for transform api --- notebooks/05_transform.ipynb | 111 ++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 47 deletions(-) diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb index b3569a9a62..fc6fdd9f72 100644 --- a/notebooks/05_transform.ipynb +++ b/notebooks/05_transform.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "da198c67", "metadata": {}, "outputs": [ @@ -37,22 +37,40 @@ "name": "stderr", "output_type": "stream", "text": [ - "2022-11-15 18:27:33.901721: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "WARNING:root:Failed to import module 'datumaro.plugins.openvino_plugin.launcher': libpython3.8.so.1.0: cannot open shared object file: No such file or directory\n" + "WARNING:root:File 'coco_dataset/annotations/panoptic_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/panoptic_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_val2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/person_keypoints_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n", + "WARNING:root:File 'coco_dataset/annotations/captions_train2017.json' was skipped, could't match this file with any of these tasks: coco_instances\n" ] }, { - "ename": "Exception", - "evalue": "Failed to find 'coco' dataset at 'coco_dataset'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn [1], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatumaro\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdm\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m dataset \u001b[39m=\u001b[39m dm\u001b[39m.\u001b[39;49mDataset\u001b[39m.\u001b[39;49mimport_from(\u001b[39m'\u001b[39;49m\u001b[39mcoco_dataset\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39mcoco_instances\u001b[39;49m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m 10\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mRepresentation for sample COCO dataset\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m 11\u001b[0m dataset\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/components/dataset.py:1238\u001b[0m, in \u001b[0;36mDataset.import_from\u001b[0;34m(cls, path, format, env, progress_reporter, error_policy, **kwargs)\u001b[0m\n\u001b[1;32m 1236\u001b[0m importer \u001b[39m=\u001b[39m env\u001b[39m.\u001b[39mmake_importer(\u001b[39mformat\u001b[39m)\n\u001b[1;32m 1237\u001b[0m \u001b[39mwith\u001b[39;00m logging_disabled(log\u001b[39m.\u001b[39mINFO):\n\u001b[0;32m-> 1238\u001b[0m detected_sources \u001b[39m=\u001b[39m importer(path, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 1239\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mformat\u001b[39m \u001b[39min\u001b[39;00m env\u001b[39m.\u001b[39mextractors:\n\u001b[1;32m 1240\u001b[0m detected_sources \u001b[39m=\u001b[39m [{\u001b[39m\"\u001b[39m\u001b[39murl\u001b[39m\u001b[39m\"\u001b[39m: path, \u001b[39m\"\u001b[39m\u001b[39mformat\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mformat\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39moptions\u001b[39m\u001b[39m\"\u001b[39m: kwargs}]\n", - "File \u001b[0;32m~/anaconda3/envs/datum/lib/python3.8/site-packages/datumaro-0.3.1-py3.8.egg/datumaro/plugins/coco_format/importer.py:68\u001b[0m, in \u001b[0;36mCocoImporter.__call__\u001b[0;34m(self, path, **extra_params)\u001b[0m\n\u001b[1;32m 65\u001b[0m subsets \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfind_sources(path)\n\u001b[1;32m 67\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(subsets) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m---> 68\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mException\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mFailed to find \u001b[39m\u001b[39m'\u001b[39m\u001b[39mcoco\u001b[39m\u001b[39m'\u001b[39m\u001b[39m dataset at \u001b[39m\u001b[39m'\u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m path)\n\u001b[1;32m 70\u001b[0m \u001b[39m# TODO: should be removed when proper label merging is implemented\u001b[39;00m\n\u001b[1;32m 71\u001b[0m conflicting_types \u001b[39m=\u001b[39m {\n\u001b[1;32m 72\u001b[0m CocoTask\u001b[39m.\u001b[39minstances,\n\u001b[1;32m 73\u001b[0m CocoTask\u001b[39m.\u001b[39mperson_keypoints,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 76\u001b[0m CocoTask\u001b[39m.\u001b[39mstuff,\n\u001b[1;32m 77\u001b[0m }\n", - "\u001b[0;31mException\u001b[0m: Failed to find 'coco' dataset at 'coco_dataset'" + "name": "stdout", + "output_type": "stream", + "text": [ + "Representation for sample COCO dataset\n" ] + }, + { + "data": { + "text/plain": [ + "Dataset\n", + "\tsize=123287\n", + "\tsource_path=coco_dataset\n", + "\tmedia_type=\n", + "\tannotated_items_count=122218\n", + "\tannotations_count=1018861\n", + "subsets\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['mask', 'polygon']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n", + "categories\n", + "\tlabel: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -80,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "d38cfc9b", "metadata": { "scrolled": true @@ -1099,7 +1117,7 @@ " ...]" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -1129,7 +1147,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "51bf3388", "metadata": {}, "outputs": [ @@ -2139,7 +2157,7 @@ " ...]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -2159,7 +2177,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "fb608396", "metadata": { "scrolled": true @@ -3171,7 +3189,7 @@ " ...]" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -3193,7 +3211,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "a2515d03", "metadata": {}, "outputs": [ @@ -3207,13 +3225,13 @@ "\tannotated_items_count=122218\n", "\tannotations_count=1018861\n", "subsets\n", - "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['polygon', 'mask']\n", - "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "\ttrain2017: # of items=118287, # of annotated items=117266, # of annotations=976995, annotation types=['mask', 'polygon']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -3238,7 +3256,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "96a8e001", "metadata": {}, "outputs": [ @@ -3252,12 +3270,12 @@ "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['polygon', 'mask']\n", + "\tval2017: # of items=5000, # of annotated items=4952, # of annotations=41866, annotation types=['mask', 'polygon']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -3271,7 +3289,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "97d25c76", "metadata": {}, "outputs": [ @@ -3285,16 +3303,16 @@ "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\tval1: # of items=1000, # of annotated items=993, # of annotations=8237, annotation types=['polygon', 'mask']\n", - "\tval2: # of items=1000, # of annotated items=989, # of annotations=8542, annotation types=['polygon', 'mask']\n", - "\tval3: # of items=1000, # of annotated items=986, # of annotations=8237, annotation types=['polygon', 'mask']\n", - "\tval4: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n", - "\tval5: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n", + "\tval1: # of items=1000, # of annotated items=991, # of annotations=8344, annotation types=['mask', 'polygon']\n", + "\tval2: # of items=1000, # of annotated items=991, # of annotations=7646, annotation types=['mask', 'polygon']\n", + "\tval3: # of items=1000, # of annotated items=993, # of annotations=8625, annotation types=['mask', 'polygon']\n", + "\tval4: # of items=1000, # of annotated items=986, # of annotations=8752, annotation types=['mask', 'polygon']\n", + "\tval5: # of items=1000, # of annotated items=991, # of annotations=8499, annotation types=['mask', 'polygon']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -3315,7 +3333,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "f2cee2b8", "metadata": {}, "outputs": [ @@ -3324,21 +3342,21 @@ "text/plain": [ "Dataset\n", "\tsize=5000\n", - "\tsource_path=None\n", + "\tsource_path=coco_dataset\n", "\tmedia_type=\n", "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\tval1: # of items=1029, # of annotated items=1029, # of annotations=8381, annotation types=['polygon', 'mask']\n", - "\tval2: # of items=1000, # of annotated items=975, # of annotations=8374, annotation types=['polygon', 'mask']\n", - "\tval3: # of items=1009, # of annotated items=1009, # of annotations=8376, annotation types=['polygon', 'mask']\n", - "\tval4: # of items=966, # of annotated items=943, # of annotations=8374, annotation types=['polygon', 'mask']\n", - "\tval5: # of items=996, # of annotated items=996, # of annotations=8361, annotation types=['polygon', 'mask']\n", + "\tval1: # of items=1000, # of annotated items=1000, # of annotations=8368, annotation types=['mask', 'polygon']\n", + "\tval2: # of items=967, # of annotated items=919, # of annotations=8374, annotation types=['mask', 'polygon']\n", + "\tval3: # of items=1032, # of annotated items=1032, # of annotations=8374, annotation types=['mask', 'polygon']\n", + "\tval4: # of items=987, # of annotated items=987, # of annotations=8376, annotation types=['mask', 'polygon']\n", + "\tval5: # of items=1014, # of annotated items=1014, # of annotations=8374, annotation types=['mask', 'polygon']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -3350,7 +3368,6 @@ "splits = [(\"val1\", 0.2), (\"val2\", 0.2), (\"val3\", 0.2), (\"val4\", 0.2), (\"val5\", 0.2)]\n", "\n", "crossval_per_ann_dataset = val_dataset.transform(\"split\", task=task, splits=splits)\n", - "crossval_per_ann_dataset = Dataset(splitter.Split(val_dataset, task, splits))\n", "crossval_per_ann_dataset" ] }, @@ -3364,7 +3381,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "f5acac08", "metadata": {}, "outputs": [ @@ -3378,14 +3395,14 @@ "\tannotated_items_count=4952\n", "\tannotations_count=41866\n", "subsets\n", - "\ttest: # of items=1000, # of annotated items=991, # of annotations=8404, annotation types=['polygon', 'mask']\n", - "\ttrain: # of items=3000, # of annotated items=2968, # of annotations=25016, annotation types=['polygon', 'mask']\n", - "\tval: # of items=1000, # of annotated items=993, # of annotations=8446, annotation types=['polygon', 'mask']\n", + "\ttest: # of items=1014, # of annotated items=1014, # of annotations=8374, annotation types=['mask', 'polygon']\n", + "\ttrain: # of items=2999, # of annotated items=2951, # of annotations=25116, annotation types=['mask', 'polygon']\n", + "\tval: # of items=987, # of annotated items=987, # of annotations=8376, annotation types=['mask', 'polygon']\n", "categories\n", "\tlabel: ['person', 'bicycle', 'car', 'airplane', 'train', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } From 74560f27195e31c1c05914025c4904d45217ff26 Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Tue, 15 Nov 2022 18:43:56 +0900 Subject: [PATCH 8/8] correct typo --- notebooks/05_transform.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/05_transform.ipynb b/notebooks/05_transform.ipynb index fc6fdd9f72..439e2b5854 100644 --- a/notebooks/05_transform.ipynb +++ b/notebooks/05_transform.ipynb @@ -3328,7 +3328,7 @@ "id": "43e95f07", "metadata": {}, "source": [ - "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations." + "Furthermore, Datumaro provides the split function in the viewpoint of annotation instead of sample through a task-specific splitter. By performing below, we can get the well-distributed validation datasets in terms of the number of annotations." ] }, {