diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb index 4bcbfcba6b..f679f997a3 100644 --- a/demo/MMSegmentation_Tutorial.ipynb +++ b/demo/MMSegmentation_Tutorial.ipynb @@ -7,7 +7,7 @@ "id": "view-in-github" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -68,8 +68,12 @@ "source": [ "# Install PyTorch\n", "!conda install pytorch=1.10.0 torchvision cudatoolkit=11.1 -c pytorch\n", + "# Install mim\n", + "!pip install -U openmim\n", + "# Install mmengine\n", + "!mim install mmengine\n", "# Install MMCV\n", - "!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10/index.html" + "!mim install 'mmcv >= 2.0.0rc1'" ] }, { @@ -85,7 +89,7 @@ "outputs": [], "source": [ "!rm -rf mmsegmentation\n", - "!git clone https://github.com/open-mmlab/mmsegmentation.git \n", + "!git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git \n", "%cd mmsegmentation\n", "!pip install -e ." ] @@ -111,110 +115,15 @@ "print(mmseg.__version__)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "eUcuC3dUv32I" - }, - "source": [ - "## Run Inference with MMSeg trained weight" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "2hd41IGaiNet", - "outputId": "b7b2aafc-edf2-43e4-ea43-0b5dd0aa4b4a" - }, - "outputs": [], - "source": [ - "!mkdir checkpoints\n", - "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H8Fxg8i-wHJE" - }, - "outputs": [], - "source": [ - "from mmseg.apis import inference_model, init_model, show_result_pyplot\n", - "from mmseg.utils import get_palette" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "umk8sJ0Xuace" - }, - "outputs": [], - "source": [ - "config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n", - "checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "nWlQFuTgudxu", - "outputId": "5e45f4f6-5bcf-4d04-bb9c-0428ee84a576" - }, - "outputs": [], - "source": [ - "# build the model from a config file and a checkpoint file\n", - "model = init_model(config_file, checkpoint_file, device='cuda:0')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "izFv6pSRujk9" - }, - "outputs": [], - "source": [ - "# test a single image\n", - "img = 'demo/demo.png'\n", - "result = inference_model(model, img)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 504 - }, - "id": "bDcs9udgunQK", - "outputId": "7c55f713-4085-47fd-fa06-720a321d0795" - }, - "outputs": [], - "source": [ - "# show the results\n", - "show_result_pyplot(model, img, result, get_palette('cityscapes'))" - ] - }, { "cell_type": "markdown", "metadata": { "id": "Ta51clKX4cwM" }, "source": [ - "## Train a semantic segmentation model on a new dataset\n", + "## Finetune a semantic segmentation model on a new dataset\n", "\n", - "To train on a customized dataset, the following steps are necessary. \n", + "To finetune on a customized dataset, the following steps are necessary. \n", "1. Add a new dataset class. \n", "2. Create a config file accordingly. \n", "3. Perform training and evaluation. " @@ -268,8 +177,10 @@ "source": [ "# Let's take a look at the dataset\n", "import mmcv\n", + "import mmengine\n", "import matplotlib.pyplot as plt\n", "\n", + "\n", "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n", "plt.figure(figsize=(8, 6))\n", "plt.imshow(mmcv.bgr2rgb(img))\n", @@ -293,18 +204,30 @@ }, "outputs": [], "source": [ - "import os.path as osp\n", - "import numpy as np\n", - "from PIL import Image\n", - "# convert dataset annotation to semantic segmentation map\n", + "# define dataset root and directory for images and annotations\n", "data_root = 'iccv09Data'\n", "img_dir = 'images'\n", "ann_dir = 'labels'\n", - "# define class and plaette for better visualization\n", + "# define class and palette for better visualization\n", "classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n", "palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n", - " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]\n", - "for file in mmcv.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n", + " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WnGZfribFHCx" + }, + "outputs": [], + "source": [ + "import os.path as osp\n", + "import numpy as np\n", + "from PIL import Image\n", + "\n", + "# convert dataset annotation to semantic segmentation map\n", + "for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n", " seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n", " seg_img = Image.fromarray(seg_map).convert('P')\n", " seg_img.putpalette(np.array(palette, dtype=np.uint8))\n", @@ -351,8 +274,8 @@ "source": [ "# split train/val set randomly\n", "split_dir = 'splits'\n", - "mmcv.mkdir_or_exist(osp.join(data_root, split_dir))\n", - "filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(\n", + "mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n", + "filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n", " osp.join(data_root, ann_dir), suffix='.png')]\n", "with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n", " # select first 4/5 as train set\n", @@ -380,18 +303,15 @@ }, "outputs": [], "source": [ - "from mmseg.datasets.builder import DATASETS\n", - "from mmseg.datasets.custom import BaseSegDataset\n", + "from mmseg.registry import DATASETS\n", + "from mmseg.datasets import BaseSegDataset\n", + "\n", "\n", "@DATASETS.register_module()\n", "class StanfordBackgroundDataset(BaseSegDataset):\n", - " CLASSES = classes\n", - " PALETTE = palette\n", - " def __init__(self, split, **kwargs):\n", - " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', \n", - " split=split, **kwargs)\n", - " assert osp.exists(self.img_dir) and self.split is not None\n", - "\n", + " METAINFO = dict(classes = classes, palette = palette)\n", + " def __init__(self, **kwargs):\n", + " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n", " " ] }, @@ -405,6 +325,16 @@ "In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Download config and checkpoint files\n", + "!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ." + ] + }, { "cell_type": "code", "execution_count": null, @@ -413,8 +343,9 @@ }, "outputs": [], "source": [ - "from mmcv import Config\n", - "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')" + "from mmengine import Config\n", + "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n", + "print(f'Config:\\n{cfg.pretty_text}')" ] }, { @@ -438,10 +369,10 @@ }, "outputs": [], "source": [ - "from mmseg.apis import set_random_seed\n", - "\n", "# Since we use only one GPU, BN is used instead of SyncBN\n", "cfg.norm_cfg = dict(type='BN', requires_grad=True)\n", + "cfg.crop_size = (256, 256)\n", + "cfg.model.data_preprocessor.size = cfg.crop_size\n", "cfg.model.backbone.norm_cfg = cfg.norm_cfg\n", "cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n", "cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n", @@ -453,79 +384,55 @@ "cfg.dataset_type = 'StanfordBackgroundDataset'\n", "cfg.data_root = data_root\n", "\n", - "cfg.data.samples_per_gpu = 8\n", - "cfg.data.workers_per_gpu=8\n", + "cfg.train_dataloader.batch_size = 8\n", "\n", - "cfg.img_norm_cfg = dict(\n", - " mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n", - "cfg.crop_size = (256, 256)\n", "cfg.train_pipeline = [\n", " dict(type='LoadImageFromFile'),\n", " dict(type='LoadAnnotations'),\n", - " dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),\n", + " dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n", " dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n", - " dict(type='RandomFlip', flip_ratio=0.5),\n", - " dict(type='PhotoMetricDistortion'),\n", - " dict(type='Normalize', **cfg.img_norm_cfg),\n", - " dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),\n", - " dict(type='DefaultFormatBundle'),\n", - " dict(type='Collect', keys=['img', 'gt_semantic_seg']),\n", + " dict(type='RandomFlip', prob=0.5),\n", + " dict(type='PackSegInputs')\n", "]\n", "\n", "cfg.test_pipeline = [\n", " dict(type='LoadImageFromFile'),\n", - " dict(\n", - " type='MultiScaleFlipAug',\n", - " img_scale=(320, 240),\n", - " # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],\n", - " flip=False,\n", - " transforms=[\n", - " dict(type='Resize', keep_ratio=True),\n", - " dict(type='RandomFlip'),\n", - " dict(type='Normalize', **cfg.img_norm_cfg),\n", - " dict(type='ImageToTensor', keys=['img']),\n", - " dict(type='Collect', keys=['img']),\n", - " ])\n", + " dict(type='Resize', scale=(320, 240), keep_ratio=True),\n", + " # add loading annotation after ``Resize`` because ground truth\n", + " # does not need to do resize data transform\n", + " dict(type='LoadAnnotations'),\n", + " dict(type='PackSegInputs')\n", "]\n", "\n", "\n", - "cfg.data.train.type = cfg.dataset_type\n", - "cfg.data.train.data_root = cfg.data_root\n", - "cfg.data.train.img_dir = img_dir\n", - "cfg.data.train.ann_dir = ann_dir\n", - "cfg.data.train.pipeline = cfg.train_pipeline\n", - "cfg.data.train.split = 'splits/train.txt'\n", + "cfg.train_dataloader.dataset.type = cfg.dataset_type\n", + "cfg.train_dataloader.dataset.data_root = cfg.data_root\n", + "cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n", + "cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n", + "cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n", + "\n", + "cfg.val_dataloader.dataset.type = cfg.dataset_type\n", + "cfg.val_dataloader.dataset.data_root = cfg.data_root\n", + "cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n", + "cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n", + "cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n", "\n", - "cfg.data.val.type = cfg.dataset_type\n", - "cfg.data.val.data_root = cfg.data_root\n", - "cfg.data.val.img_dir = img_dir\n", - "cfg.data.val.ann_dir = ann_dir\n", - "cfg.data.val.pipeline = cfg.test_pipeline\n", - "cfg.data.val.split = 'splits/val.txt'\n", + "cfg.test_dataloader = cfg.val_dataloader\n", "\n", - "cfg.data.test.type = cfg.dataset_type\n", - "cfg.data.test.data_root = cfg.data_root\n", - "cfg.data.test.img_dir = img_dir\n", - "cfg.data.test.ann_dir = ann_dir\n", - "cfg.data.test.pipeline = cfg.test_pipeline\n", - "cfg.data.test.split = 'splits/val.txt'\n", "\n", - "# We can still use the pre-trained Mask RCNN model though we do not need to\n", - "# use the mask branch\n", - "cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n", + "# Load the pretrained weights\n", + "cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n", "\n", "# Set up working dir to save files and logs.\n", "cfg.work_dir = './work_dirs/tutorial'\n", "\n", - "cfg.runner.max_iters = 200\n", - "cfg.log_config.interval = 10\n", - "cfg.evaluation.interval = 200\n", - "cfg.checkpoint_config.interval = 200\n", + "cfg.train_cfg.max_iters = 200\n", + "cfg.train_cfg.val_interval = 200\n", + "cfg.default_hooks.logger.interval = 10\n", + "cfg.default_hooks.checkpoint.interval = 200\n", "\n", - "# Set seed to facitate reproducing the result\n", - "cfg.seed = 0\n", - "set_random_seed(0, deterministic=False)\n", - "cfg.gpu_ids = range(1)\n", + "# Set seed to facilitate reproducing the result\n", + "cfg['randomness'] = dict(seed=0)\n", "\n", "# Let's have a look at the final config used for training\n", "print(f'Config:\\n{cfg.pretty_text}')" @@ -552,23 +459,23 @@ }, "outputs": [], "source": [ - "from mmseg.datasets import build_dataset\n", - "from mmseg.models import build_segmentor\n", - "from mmseg.apis import train_segmentor\n", - "\n", - "\n", - "# Build the dataset\n", - "datasets = [build_dataset(cfg.data.train)]\n", + "from mmengine.runner import Runner\n", + "from mmseg.utils import register_all_modules\n", "\n", - "# Build the detector\n", - "model = build_segmentor(cfg.model)\n", - "# Add an attribute for visualization convenience\n", - "model.CLASSES = datasets[0].CLASSES\n", - "\n", - "# Create work_dir\n", - "mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n", - "train_segmentor(model, datasets, cfg, distributed=False, validate=True, \n", - " meta=dict())" + "# register all modules in mmseg into the registries\n", + "# do not init the default scope here because it will be init in the runner\n", + "register_all_modules(init_default_scope=False)\n", + "runner = Runner.from_cfg(cfg)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# start training\n", + "runner.train()" ] }, { @@ -593,20 +500,17 @@ }, "outputs": [], "source": [ - "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n", + "from mmseg.apis import inference_model, show_result_pyplot\n", "\n", - "model.cfg = cfg\n", + "model=runner.model\n", + "model.cfg=cfg\n", + "\n", + "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n", "result = inference_model(model, img)\n", "plt.figure(figsize=(8, 6))\n", - "show_result_pyplot(model, img, result, palette)" + "vis_result = show_result_pyplot(model, img, result, palette)\n", + "plt.imshow(mmcv.bgr2rgb(vis_result))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -618,7 +522,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3.7.13 ('pt1.12')", "language": "python", "name": "python3" }, @@ -632,7 +536,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.0" + "version": "3.7.13" }, "pycharm": { "stem_cell": { @@ -642,6 +546,11 @@ }, "source": [] } + }, + "vscode": { + "interpreter": { + "hash": "ffdb7915c29738c259ec7ee5d0d1b9253c264f1fd267d45dd77f1a420396c120" + } } }, "nbformat": 4, diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb index b557e9b2af..f05a947483 100644 --- a/demo/inference_demo.ipynb +++ b/demo/inference_demo.ipynb @@ -21,6 +21,8 @@ "outputs": [], "source": [ "import torch\n", + "import mmcv\n", + "import matplotlib.pyplot as plt\n", "from mmengine.model.utils import revert_sync_batchnorm\n", "from mmseg.apis import init_model, inference_model, show_result_pyplot\n", "from mmseg.utils import register_all_modules\n", @@ -71,7 +73,8 @@ "outputs": [], "source": [ "# show the results\n", - "show_result_pyplot(model, img, result)" + "vis_result = show_result_pyplot(model, img, result)\n", + "plt.imshow(mmcv.bgr2rgb(vis_result))" ] }, { diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py index 8d718c4724..552ea7a832 100644 --- a/mmseg/apis/inference.py +++ b/mmseg/apis/inference.py @@ -102,7 +102,7 @@ def _preprare_data(imgs: ImageType, model: BaseSegmentor): is_batch = False if isinstance(imgs[0], np.ndarray): - cfg.test_pipeline[0].type = 'LoadImageFromNDArray' + cfg.test_pipeline[0]['type'] = 'LoadImageFromNDArray' # TODO: Consider using the singleton pattern to avoid building # a pipeline for each inference