Skip to content

Commit

Permalink
Temp code.
Browse files Browse the repository at this point in the history
  • Loading branch information
sennnnn committed Jul 22, 2021
1 parent f3aaecc commit 9dfe1c0
Show file tree
Hide file tree
Showing 6 changed files with 393 additions and 221 deletions.
214 changes: 147 additions & 67 deletions mmseg/apis/test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
import os.path as osp
import pickle
import shutil
import tempfile

import mmcv
import numpy as np
import torch
import torch.distributed as dist
from mmcv.engine import collect_results_cpu, collect_results_gpu
from mmcv.image import tensor2imgs
from mmcv.runner import get_dist_info

from mmseg.core.evaluation.metrics import intersect_and_union
from mmseg.core.evaluation.metrics import ResultProcessor


def np2tmp(array, temp_file_name=None, tmpdir=None):
Expand Down Expand Up @@ -169,24 +172,39 @@ def multi_gpu_test(model,

def progressive_single_gpu_test(model,
data_loader,
efficient_test,
show=False,
out_dir=None,
opacity=0.5):
model.eval()
dataset = data_loader.dataset
num_classes = len(dataset.CLASSES)
prog_bar = mmcv.ProgressBar(len(dataset))

total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_union = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_label = torch.zeros((num_classes, ), dtype=torch.float64)
if efficient_test:
collector = ResultProcessor(
num_classes=len(dataset.CLASSES),
ignore_index=dataset.ignore_index,
collect_type='pixels_count',
label_map=dataset.label_map,
reduce_zero_label=dataset.reduce_zero_label)
else:
collector = ResultProcessor(
num_classes=len(dataset.CLASSES),
ignore_index=dataset.ignore_index,
collect_type='seg_map',
label_map=dataset.label_map,
reduce_zero_label=dataset.reduce_zero_label)

gt_maps_generator = dataset.get_gt_seg_maps()

cur = 0
for _, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, **data)

gt_map = next(gt_maps_generator)
meta = data['img_metas'][0].data
collector.collect(result, gt_map, meta)

if show or out_dir:
img_tensor = data['img'][0]
img_metas = data['img_metas'][0].data[0]
Expand All @@ -213,101 +231,163 @@ def progressive_single_gpu_test(model,
out_file=out_file,
opacity=opacity)

for i in range(len(result)):
gt_semantic_map = dataset.get_gt_seg_map(cur + i)

area_intersect, area_union, area_pred_label, area_label = \
intersect_and_union(
result[i], gt_semantic_map, num_classes,
dataset.ignore_index, dataset.label_map,
dataset.reduce_zero_label)

total_area_intersect += area_intersect
total_area_union += area_union
total_area_pred_label += area_pred_label
total_area_label += area_label

print(total_area_intersect / total_area_union)

batch_size = len(result)
for _ in range(batch_size):
prog_bar.update()

cur += len(result)

return total_area_intersect, total_area_union, total_area_pred_label, \
total_area_label
return collector


# TODO: Support distributed test api
def progressive_multi_gpu_test(model,
data_loader,
efficient_test,
tmpdir=None,
gpu_collect=False):

model.eval()
dataset = data_loader.dataset
num_classes = len(dataset.CLASSES)
if efficient_test:
collector = ResultProcessor(
num_classes=len(dataset.CLASSES),
ignore_index=dataset.ignore_index,
collect_type='pixelx_count',
label_map=dataset.label_map,
reduce_zero_label=dataset.reduce_zero_label)
else:
collector = ResultProcessor(
num_classes=len(dataset.CLASSES),
ignore_index=dataset.ignore_index,
collect_type='seg_map',
label_map=dataset.label_map,
reduce_zero_label=dataset.reduce_zero_label)

rank, world_size = get_dist_info()
if rank == 0:
prog_bar = mmcv.ProgressBar(len(dataset))

total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_union = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64)
total_area_label = torch.zeros((num_classes, ), dtype=torch.float64)

cur = 0
for _, data in enumerate(data_loader):
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)

for i in range(len(result)):
gt_semantic_map = dataset.get_gt_seg_map(cur + i * world_size)

area_intersect, area_union, area_pred_label, area_label = \
intersect_and_union(
result[i], gt_semantic_map, num_classes,
dataset.ignore_index, dataset.label_map,
dataset.reduce_zero_label)
gt_seg_map = dataset.index_gt_seg_maps(cur + rank)
meta = data['img_metas'][0].data
collector.collect(result, gt_seg_map, meta)

total_area_intersect += area_intersect
total_area_union += area_union
total_area_pred_label += area_pred_label
total_area_label += area_label

if rank == 0:
for _ in range(len(result) * world_size):
prog_bar.update()
if rank == 0:
for _ in range(len(result) * world_size):
prog_bar.update()

cur += len(result) * world_size

pixel_count_matrix = [
total_area_intersect, total_area_union, total_area_pred_label,
total_area_label
]
# collect results from all ranks
if gpu_collect:
results = collect_count_results_gpu(pixel_count_matrix, 4 * world_size)
collector = collect_collector_gpu(collector)
else:
results = collect_count_results_cpu(pixel_count_matrix, 4 * world_size,
tmpdir)
return results
collector = collect_collector_cpu(collector, tmpdir)
return collector


def collect_count_results_gpu(result_part, size):
"""Collect pixel count matrix result under gpu mode.
def collect_collector_gpu(collector):
"""Collect result collectors under gpu mode.
On gpu mode, this function will encode results to gpu tensors and use gpu
communication for results collection.
Args:
result_part (list[Tensor]): four type of pixel count matrix --
{area_intersect, area_union, area_pred_label, area_label}, These
four tensor shape of (num_classes, ).
size (int): Size of the results, commonly equal to length of
the results.
collector (object): Result collector containing predictions and labels
to be collected.
Returns:
object: The gathered collector.
"""
pass
rank, world_size = get_dist_info()
# dump result part to tensor with pickle
part_tensor = torch.tensor(
bytearray(pickle.dumps(collector)), dtype=torch.uint8, device='cuda')
# gather all result part tensor shape
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
shape_list = [shape_tensor.clone() for _ in range(world_size)]
dist.all_gather(shape_list, shape_tensor)
# padding result part tensor to max length
shape_max = torch.tensor(shape_list).max()
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
part_send[:shape_tensor[0]] = part_tensor
part_recv_list = [
part_tensor.new_zeros(shape_max) for _ in range(world_size)
]
# gather all result part
dist.all_gather(part_recv_list, part_send)

if rank == 0:
# load results of all parts from tmp dir
main_collector = pickle.loads(
part_recv_list[0][:shape_list[0]].cpu().numpy().tobytes())
sub_collectors = []
for recv, shape in zip(part_recv_list, shape_list):
part_collector = pickle.loads(
recv[:shape[0]].cpu().numpy().tobytes())
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_collector:
sub_collectors.append(part_collector)
main_collector.merge(sub_collectors)
return main_collector


def collect_collector_cpu(collector, tmpdir=None):
"""Collect result collectors under cpu mode.
On cpu mode, this function will save the result collectors on different
gpus to``tmpdir`` and collect them by the rank 0 worker.
def collect_count_results_cpu(result_part, size, tmpdir=None):
pass
Args:
collector (object): Result collector containing predictions and labels
to be collected.
tmpdir (str | None): temporal directory for collected results to
store. If set to None, it will create a random temporal directory
for it.
Returns:
object: The gathered collector.
"""
rank, world_size = get_dist_info()
# create a tmp dir if it is not specified
if tmpdir is None:
MAX_LEN = 512
# 32 is whitespace
dir_tensor = torch.full((MAX_LEN, ),
32,
dtype=torch.uint8,
device='cuda')
if rank == 0:
mmcv.mkdir_or_exist('.dist_test')
tmpdir = tempfile.mkdtemp(dir='.dist_test')
tmpdir = torch.tensor(
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
dir_tensor[:len(tmpdir)] = tmpdir
dist.broadcast(dir_tensor, 0)
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
else:
mmcv.mkdir_or_exist(tmpdir)
# dump the part result to the dir
mmcv.dump(collector, osp.join(tmpdir, f'part_{rank}.pkl'))
dist.barrier()
# collect all parts
if rank != 0:
return None
else:
# load results of all parts from tmp dir
main_collector = mmcv.load(osp.join(tmpdir, f'part_{0}.pkl'))
sub_collectors = []
for i in range(1, world_size):
part_file = osp.join(tmpdir, f'part_{i}.pkl')
part_collector = mmcv.load(part_file)
# When data is severely insufficient, an empty part_result
# on a certain gpu could makes the overall outputs empty.
if part_collector:
sub_collectors.append(part_collector)
main_collector.merge(sub_collectors)
# remove tmp dir
shutil.rmtree(tmpdir)
return main_collector
7 changes: 4 additions & 3 deletions mmseg/core/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from .class_names import get_classes, get_palette
from .eval_hooks import DistEvalHook, EvalHook
from .metrics import (calculate_metrics, eval_metrics, mean_dice, mean_fscore,
mean_iou)
from .metrics import (ResultProcessor, calculate_metrics, eval_metrics,
mean_dice, mean_fscore, mean_iou)

__all__ = [
'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore',
'eval_metrics', 'get_classes', 'get_palette', 'calculate_metrics'
'eval_metrics', 'get_classes', 'get_palette', 'calculate_metrics',
'ResultProcessor'
]
Loading

0 comments on commit 9dfe1c0

Please sign in to comment.