diff --git a/demo/MMSegmentation_Tutorial.ipynb b/demo/MMSegmentation_Tutorial.ipynb
index 4bcbfcba6b..f679f997a3 100644
--- a/demo/MMSegmentation_Tutorial.ipynb
+++ b/demo/MMSegmentation_Tutorial.ipynb
@@ -7,7 +7,7 @@
"id": "view-in-github"
},
"source": [
- ""
+ ""
]
},
{
@@ -68,8 +68,12 @@
"source": [
"# Install PyTorch\n",
"!conda install pytorch=1.10.0 torchvision cudatoolkit=11.1 -c pytorch\n",
+ "# Install mim\n",
+ "!pip install -U openmim\n",
+ "# Install mmengine\n",
+ "!mim install mmengine\n",
"# Install MMCV\n",
- "!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10/index.html"
+ "!mim install 'mmcv >= 2.0.0rc1'"
]
},
{
@@ -85,7 +89,7 @@
"outputs": [],
"source": [
"!rm -rf mmsegmentation\n",
- "!git clone https://github.com/open-mmlab/mmsegmentation.git \n",
+ "!git clone -b dev-1.x https://github.com/open-mmlab/mmsegmentation.git \n",
"%cd mmsegmentation\n",
"!pip install -e ."
]
@@ -111,110 +115,15 @@
"print(mmseg.__version__)"
]
},
- {
- "cell_type": "markdown",
- "metadata": {
- "id": "eUcuC3dUv32I"
- },
- "source": [
- "## Run Inference with MMSeg trained weight"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "2hd41IGaiNet",
- "outputId": "b7b2aafc-edf2-43e4-ea43-0b5dd0aa4b4a"
- },
- "outputs": [],
- "source": [
- "!mkdir checkpoints\n",
- "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "H8Fxg8i-wHJE"
- },
- "outputs": [],
- "source": [
- "from mmseg.apis import inference_model, init_model, show_result_pyplot\n",
- "from mmseg.utils import get_palette"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "umk8sJ0Xuace"
- },
- "outputs": [],
- "source": [
- "config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n",
- "checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "nWlQFuTgudxu",
- "outputId": "5e45f4f6-5bcf-4d04-bb9c-0428ee84a576"
- },
- "outputs": [],
- "source": [
- "# build the model from a config file and a checkpoint file\n",
- "model = init_model(config_file, checkpoint_file, device='cuda:0')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "izFv6pSRujk9"
- },
- "outputs": [],
- "source": [
- "# test a single image\n",
- "img = 'demo/demo.png'\n",
- "result = inference_model(model, img)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 504
- },
- "id": "bDcs9udgunQK",
- "outputId": "7c55f713-4085-47fd-fa06-720a321d0795"
- },
- "outputs": [],
- "source": [
- "# show the results\n",
- "show_result_pyplot(model, img, result, get_palette('cityscapes'))"
- ]
- },
{
"cell_type": "markdown",
"metadata": {
"id": "Ta51clKX4cwM"
},
"source": [
- "## Train a semantic segmentation model on a new dataset\n",
+ "## Finetune a semantic segmentation model on a new dataset\n",
"\n",
- "To train on a customized dataset, the following steps are necessary. \n",
+ "To finetune on a customized dataset, the following steps are necessary. \n",
"1. Add a new dataset class. \n",
"2. Create a config file accordingly. \n",
"3. Perform training and evaluation. "
@@ -268,8 +177,10 @@
"source": [
"# Let's take a look at the dataset\n",
"import mmcv\n",
+ "import mmengine\n",
"import matplotlib.pyplot as plt\n",
"\n",
+ "\n",
"img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
"plt.figure(figsize=(8, 6))\n",
"plt.imshow(mmcv.bgr2rgb(img))\n",
@@ -293,18 +204,30 @@
},
"outputs": [],
"source": [
- "import os.path as osp\n",
- "import numpy as np\n",
- "from PIL import Image\n",
- "# convert dataset annotation to semantic segmentation map\n",
+ "# define dataset root and directory for images and annotations\n",
"data_root = 'iccv09Data'\n",
"img_dir = 'images'\n",
"ann_dir = 'labels'\n",
- "# define class and plaette for better visualization\n",
+ "# define class and palette for better visualization\n",
"classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n",
"palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n",
- " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]\n",
- "for file in mmcv.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
+ " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "WnGZfribFHCx"
+ },
+ "outputs": [],
+ "source": [
+ "import os.path as osp\n",
+ "import numpy as np\n",
+ "from PIL import Image\n",
+ "\n",
+ "# convert dataset annotation to semantic segmentation map\n",
+ "for file in mmengine.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n",
" seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n",
" seg_img = Image.fromarray(seg_map).convert('P')\n",
" seg_img.putpalette(np.array(palette, dtype=np.uint8))\n",
@@ -351,8 +274,8 @@
"source": [
"# split train/val set randomly\n",
"split_dir = 'splits'\n",
- "mmcv.mkdir_or_exist(osp.join(data_root, split_dir))\n",
- "filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(\n",
+ "mmengine.mkdir_or_exist(osp.join(data_root, split_dir))\n",
+ "filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir(\n",
" osp.join(data_root, ann_dir), suffix='.png')]\n",
"with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n",
" # select first 4/5 as train set\n",
@@ -380,18 +303,15 @@
},
"outputs": [],
"source": [
- "from mmseg.datasets.builder import DATASETS\n",
- "from mmseg.datasets.custom import BaseSegDataset\n",
+ "from mmseg.registry import DATASETS\n",
+ "from mmseg.datasets import BaseSegDataset\n",
+ "\n",
"\n",
"@DATASETS.register_module()\n",
"class StanfordBackgroundDataset(BaseSegDataset):\n",
- " CLASSES = classes\n",
- " PALETTE = palette\n",
- " def __init__(self, split, **kwargs):\n",
- " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', \n",
- " split=split, **kwargs)\n",
- " assert osp.exists(self.img_dir) and self.split is not None\n",
- "\n",
+ " METAINFO = dict(classes = classes, palette = palette)\n",
+ " def __init__(self, **kwargs):\n",
+ " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', **kwargs)\n",
" "
]
},
@@ -405,6 +325,16 @@
"In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Download config and checkpoint files\n",
+ "!mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest ."
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -413,8 +343,9 @@
},
"outputs": [],
"source": [
- "from mmcv import Config\n",
- "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')"
+ "from mmengine import Config\n",
+ "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')\n",
+ "print(f'Config:\\n{cfg.pretty_text}')"
]
},
{
@@ -438,10 +369,10 @@
},
"outputs": [],
"source": [
- "from mmseg.apis import set_random_seed\n",
- "\n",
"# Since we use only one GPU, BN is used instead of SyncBN\n",
"cfg.norm_cfg = dict(type='BN', requires_grad=True)\n",
+ "cfg.crop_size = (256, 256)\n",
+ "cfg.model.data_preprocessor.size = cfg.crop_size\n",
"cfg.model.backbone.norm_cfg = cfg.norm_cfg\n",
"cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n",
"cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n",
@@ -453,79 +384,55 @@
"cfg.dataset_type = 'StanfordBackgroundDataset'\n",
"cfg.data_root = data_root\n",
"\n",
- "cfg.data.samples_per_gpu = 8\n",
- "cfg.data.workers_per_gpu=8\n",
+ "cfg.train_dataloader.batch_size = 8\n",
"\n",
- "cfg.img_norm_cfg = dict(\n",
- " mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n",
- "cfg.crop_size = (256, 256)\n",
"cfg.train_pipeline = [\n",
" dict(type='LoadImageFromFile'),\n",
" dict(type='LoadAnnotations'),\n",
- " dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),\n",
+ " dict(type='RandomResize', scale=(320, 240), ratio_range=(0.5, 2.0), keep_ratio=True),\n",
" dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n",
- " dict(type='RandomFlip', flip_ratio=0.5),\n",
- " dict(type='PhotoMetricDistortion'),\n",
- " dict(type='Normalize', **cfg.img_norm_cfg),\n",
- " dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),\n",
- " dict(type='DefaultFormatBundle'),\n",
- " dict(type='Collect', keys=['img', 'gt_semantic_seg']),\n",
+ " dict(type='RandomFlip', prob=0.5),\n",
+ " dict(type='PackSegInputs')\n",
"]\n",
"\n",
"cfg.test_pipeline = [\n",
" dict(type='LoadImageFromFile'),\n",
- " dict(\n",
- " type='MultiScaleFlipAug',\n",
- " img_scale=(320, 240),\n",
- " # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],\n",
- " flip=False,\n",
- " transforms=[\n",
- " dict(type='Resize', keep_ratio=True),\n",
- " dict(type='RandomFlip'),\n",
- " dict(type='Normalize', **cfg.img_norm_cfg),\n",
- " dict(type='ImageToTensor', keys=['img']),\n",
- " dict(type='Collect', keys=['img']),\n",
- " ])\n",
+ " dict(type='Resize', scale=(320, 240), keep_ratio=True),\n",
+ " # add loading annotation after ``Resize`` because ground truth\n",
+ " # does not need to do resize data transform\n",
+ " dict(type='LoadAnnotations'),\n",
+ " dict(type='PackSegInputs')\n",
"]\n",
"\n",
"\n",
- "cfg.data.train.type = cfg.dataset_type\n",
- "cfg.data.train.data_root = cfg.data_root\n",
- "cfg.data.train.img_dir = img_dir\n",
- "cfg.data.train.ann_dir = ann_dir\n",
- "cfg.data.train.pipeline = cfg.train_pipeline\n",
- "cfg.data.train.split = 'splits/train.txt'\n",
+ "cfg.train_dataloader.dataset.type = cfg.dataset_type\n",
+ "cfg.train_dataloader.dataset.data_root = cfg.data_root\n",
+ "cfg.train_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+ "cfg.train_dataloader.dataset.pipeline = cfg.train_pipeline\n",
+ "cfg.train_dataloader.dataset.ann_file = 'splits/train.txt'\n",
+ "\n",
+ "cfg.val_dataloader.dataset.type = cfg.dataset_type\n",
+ "cfg.val_dataloader.dataset.data_root = cfg.data_root\n",
+ "cfg.val_dataloader.dataset.data_prefix = dict(img_path=img_dir, seg_map_path=ann_dir)\n",
+ "cfg.val_dataloader.dataset.pipeline = cfg.test_pipeline\n",
+ "cfg.val_dataloader.dataset.ann_file = 'splits/val.txt'\n",
"\n",
- "cfg.data.val.type = cfg.dataset_type\n",
- "cfg.data.val.data_root = cfg.data_root\n",
- "cfg.data.val.img_dir = img_dir\n",
- "cfg.data.val.ann_dir = ann_dir\n",
- "cfg.data.val.pipeline = cfg.test_pipeline\n",
- "cfg.data.val.split = 'splits/val.txt'\n",
+ "cfg.test_dataloader = cfg.val_dataloader\n",
"\n",
- "cfg.data.test.type = cfg.dataset_type\n",
- "cfg.data.test.data_root = cfg.data_root\n",
- "cfg.data.test.img_dir = img_dir\n",
- "cfg.data.test.ann_dir = ann_dir\n",
- "cfg.data.test.pipeline = cfg.test_pipeline\n",
- "cfg.data.test.split = 'splits/val.txt'\n",
"\n",
- "# We can still use the pre-trained Mask RCNN model though we do not need to\n",
- "# use the mask branch\n",
- "cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
+ "# Load the pretrained weights\n",
+ "cfg.load_from = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n",
"\n",
"# Set up working dir to save files and logs.\n",
"cfg.work_dir = './work_dirs/tutorial'\n",
"\n",
- "cfg.runner.max_iters = 200\n",
- "cfg.log_config.interval = 10\n",
- "cfg.evaluation.interval = 200\n",
- "cfg.checkpoint_config.interval = 200\n",
+ "cfg.train_cfg.max_iters = 200\n",
+ "cfg.train_cfg.val_interval = 200\n",
+ "cfg.default_hooks.logger.interval = 10\n",
+ "cfg.default_hooks.checkpoint.interval = 200\n",
"\n",
- "# Set seed to facitate reproducing the result\n",
- "cfg.seed = 0\n",
- "set_random_seed(0, deterministic=False)\n",
- "cfg.gpu_ids = range(1)\n",
+ "# Set seed to facilitate reproducing the result\n",
+ "cfg['randomness'] = dict(seed=0)\n",
"\n",
"# Let's have a look at the final config used for training\n",
"print(f'Config:\\n{cfg.pretty_text}')"
@@ -552,23 +459,23 @@
},
"outputs": [],
"source": [
- "from mmseg.datasets import build_dataset\n",
- "from mmseg.models import build_segmentor\n",
- "from mmseg.apis import train_segmentor\n",
- "\n",
- "\n",
- "# Build the dataset\n",
- "datasets = [build_dataset(cfg.data.train)]\n",
+ "from mmengine.runner import Runner\n",
+ "from mmseg.utils import register_all_modules\n",
"\n",
- "# Build the detector\n",
- "model = build_segmentor(cfg.model)\n",
- "# Add an attribute for visualization convenience\n",
- "model.CLASSES = datasets[0].CLASSES\n",
- "\n",
- "# Create work_dir\n",
- "mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
- "train_segmentor(model, datasets, cfg, distributed=False, validate=True, \n",
- " meta=dict())"
+ "# register all modules in mmseg into the registries\n",
+ "# do not init the default scope here because it will be init in the runner\n",
+ "register_all_modules(init_default_scope=False)\n",
+ "runner = Runner.from_cfg(cfg)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start training\n",
+ "runner.train()"
]
},
{
@@ -593,20 +500,17 @@
},
"outputs": [],
"source": [
- "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
+ "from mmseg.apis import inference_model, show_result_pyplot\n",
"\n",
- "model.cfg = cfg\n",
+ "model=runner.model\n",
+ "model.cfg=cfg\n",
+ "\n",
+ "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n",
"result = inference_model(model, img)\n",
"plt.figure(figsize=(8, 6))\n",
- "show_result_pyplot(model, img, result, palette)"
+ "vis_result = show_result_pyplot(model, img, result, palette)\n",
+ "plt.imshow(mmcv.bgr2rgb(vis_result))"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -618,7 +522,7 @@
"provenance": []
},
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3.7.13 ('pt1.12')",
"language": "python",
"name": "python3"
},
@@ -632,7 +536,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.0"
+ "version": "3.7.13"
},
"pycharm": {
"stem_cell": {
@@ -642,6 +546,11 @@
},
"source": []
}
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "ffdb7915c29738c259ec7ee5d0d1b9253c264f1fd267d45dd77f1a420396c120"
+ }
}
},
"nbformat": 4,
diff --git a/demo/inference_demo.ipynb b/demo/inference_demo.ipynb
index b557e9b2af..f05a947483 100644
--- a/demo/inference_demo.ipynb
+++ b/demo/inference_demo.ipynb
@@ -21,6 +21,8 @@
"outputs": [],
"source": [
"import torch\n",
+ "import mmcv\n",
+ "import matplotlib.pyplot as plt\n",
"from mmengine.model.utils import revert_sync_batchnorm\n",
"from mmseg.apis import init_model, inference_model, show_result_pyplot\n",
"from mmseg.utils import register_all_modules\n",
@@ -71,7 +73,8 @@
"outputs": [],
"source": [
"# show the results\n",
- "show_result_pyplot(model, img, result)"
+ "vis_result = show_result_pyplot(model, img, result)\n",
+ "plt.imshow(mmcv.bgr2rgb(vis_result))"
]
},
{
diff --git a/mmseg/apis/inference.py b/mmseg/apis/inference.py
index 8d718c4724..552ea7a832 100644
--- a/mmseg/apis/inference.py
+++ b/mmseg/apis/inference.py
@@ -102,7 +102,7 @@ def _preprare_data(imgs: ImageType, model: BaseSegmentor):
is_batch = False
if isinstance(imgs[0], np.ndarray):
- cfg.test_pipeline[0].type = 'LoadImageFromNDArray'
+ cfg.test_pipeline[0]['type'] = 'LoadImageFromNDArray'
# TODO: Consider using the singleton pattern to avoid building
# a pipeline for each inference