diff --git a/contrib/EISeg/.gitignore b/EISeg/.gitignore similarity index 97% rename from contrib/EISeg/.gitignore rename to EISeg/.gitignore index b076cc0d76..dd36be3ae9 100644 --- a/contrib/EISeg/.gitignore +++ b/EISeg/.gitignore @@ -1,4 +1,5 @@ # Byte-compiled / optimized / DLL files +test/ __pycache__/ *.py[cod] *$py.class @@ -9,6 +10,9 @@ vis_temp.py test.txt *.pdparams output/ +temp* +temp/ + # C extensions *.so @@ -143,8 +147,12 @@ out/ eiseg/requirements_with_opt.txt # test +test/ test_output/ *.npy # qsetting -*.ini \ No newline at end of file +*.ini + +# mask_sm +tool/mask.png diff --git a/legacy/deploy/cpp/LICENSE b/EISeg/LICENSE similarity index 100% rename from legacy/deploy/cpp/LICENSE rename to EISeg/LICENSE diff --git a/contrib/EISeg/MANIFEST.in b/EISeg/MANIFEST.in similarity index 61% rename from contrib/EISeg/MANIFEST.in rename to EISeg/MANIFEST.in index 5ed39b69f4..709b300985 100644 --- a/contrib/EISeg/MANIFEST.in +++ b/EISeg/MANIFEST.in @@ -1,2 +1,3 @@ include eiseg/config/* include eiseg/resource/* +include eiseg/util/translate/* \ No newline at end of file diff --git a/EISeg/README.md b/EISeg/README.md new file mode 100644 index 0000000000..b07d234beb --- /dev/null +++ b/EISeg/README.md @@ -0,0 +1,149 @@ +# EISeg + +[![Python 3.6](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) + + +## 最新动向 + +- 支持多边形编辑,上线更多功能,最新EISeg 0.3.0推出。 + +## 介绍 + +EISeg(Efficient Interactive Segmentation)是基于飞桨开发的一个高效智能的交互式分割标注软件。涵盖了高精度和轻量级等不同方向的高质量交互式分割模型,方便开发者快速实现语义及实例标签的标注,降低标注成本。 另外,将EISeg获取到的标注应用到PaddleSeg提供的其他分割模型进行训练,便可得到定制化场景的高精度模型,打通分割任务从数据标注到模型训练及预测的全流程。 + +![eiseg_demo](../../docs/images/eiseg_demo.gif) + +## 模型准备 + +在使用EIseg前,请先下载模型参数。EISeg开放了在COCO+LVIS和大规模人像数据上训练的四个标注模型,满足通用场景和人像场景的标注需求。其中模型结构对应EISeg交互工具中的网络选择模块,用户需要根据自己的场景需求选择不同的网络结构和加载参数。 + +| 模型类型 | 适用场景 | 模型结构 | 下载地址| +| --- | --- | --- | ---| +| 高精度模型 | 适用于通用场景的图像标注。 |HRNet18_OCR64 | [hrnet18_ocr64_cocolvis](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18_ocr64_cocolvis.pdparams) | +| 轻量化模型 | 适用于通用场景的图像标注。 |HRNet18s_OCR48 | [hrnet18s_ocr48_cocolvis](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18s_ocr48_cocolvis.pdparams) | +| 高精度模型 | 适用于人像标注场景。 |HRNet18_OCR64 | [hrnet18_ocr64_human](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18_ocr64_human.pdparams) | +| 轻量化模型 | 适用于人像标注场景。 |HRNet18s_OCR48 | [hrnet18s_ocr48_human](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18s_ocr48_human.pdparams) | + + + +## 安装使用 + +EISeg提供多种安装方式,其中使用[pip](#PIP)和[运行代码](#运行代码)方式可兼容Windows,Mac OS和Linux。为了避免环境冲突,推荐在conda创建的虚拟环境中安装。 + +版本要求: + +* PaddlePaddle >= 2.1.0 + +PaddlePaddle安装请参考[官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html)。 + +### 克隆到本地 + +通过git将PaddleSeg克隆到本地: + +```shell +git clone https://github.com/PaddlePaddle/PaddleSeg.git +``` + +安装好所需环境后,进入EISeg,可通过直接运行eiseg打开EISeg: + +```shell +cd PaddleSeg\contrib\EISeg +python -m eiseg +``` + +或进入eiseg,运行exe.py打开EISeg: + +```shell +cd PaddleSeg\contrib\EISeg\eiseg +python exe.py +``` + +### PIP + +pip安装方式如下: + +```shell +pip install eiseg +``` +pip会自动安装依赖。安装完成后命令行输入: +```shell +eiseg +``` +即可运行软件。 + +### Windows exe + +EISeg使用[QPT](https://github.com/GT-ZhangAcer/QPT)进行打包。可以从[这里](http://cloud.a-boat.cn:2021/share/egI4T3tB)或[百度云盘](https://pan.baidu.com/s/19kUUQBJamL0xY5UKzlNBzg)(提取码:82z9)下载最新EISeg。解压后双击启动程序.exe即可运行程序。程序第一次运行会初始化安装所需要的包,请稍等片刻。 + +## 使用 + +打开软件后,在对项目进行标注前,需要进行如下设置: + +1. **模型参数加载** + + 选择合适的网络,并加载对应的模型参数。目前在EISeg中,网络分为`HRNet18s_OCR48`和`HRNet18_OCR64`,并分别提供了人像和通用两种模型参数。在正确加载模型参数后,右下角状态栏会给予说明。若网络参数与模型参数不符,将会弹出警告,此时加载失败需重新加载。正确加载的模型参数会记录在`近期模型参数`中,可以方便切换,并且下次打开软件时自动加载退出时的模型参数。 + +2. **图像加载** + + 打开图像/图像文件夹。当看到主界面图像正确加载,`数据列表`正确出现图像路径即可。 + +3. **标签添加/加载** + + 添加/加载标签。可以通过`添加标签`新建标签,标签分为4列,分别对应像素值、说明、颜色和删除。新建好的标签可以通过`保存标签列表`保存为txt文件,其他合作者可以通过`加载标签列表`将标签导入。通过加载方式导入的标签,重启软件后会自动加载。 + +4. **自动保存设置** + + 在使用中可以将`自动保存`设置上,设定好文件夹即可,这样在使用时切换图像会自动将完成标注的图像进行保存。 + +当设置完成后即可开始进行标注,默认情况下常用的按键/快捷键如下,如需修改可按`E`弹出快捷键修改。 + +| 部分按键/快捷键 | 功能 | +| --------------------- | ----------------- | +| 鼠标左键 | 增加正样本点 | +| 鼠标右键 | 增加负样本点 | +| 鼠标中键 | 平移图像 | +| Ctrl+鼠标中键(滚轮) | 缩放图像 | +| S | 切换上一张图 | +| F | 切换下一张图 | +| Space(空格) | 完成标注/切换状态 | +| Ctrl+Z | 撤销 | +| Ctrl+Shift+Z | 清除 | +| Ctrl+Y | 重做 | +| Ctrl+A | 打开图像 | +| Shift+A | 打开文件夹 | +| E | 打开快捷键表 | +| Backspace(退格) | 删除多边形 | +| 鼠标双击(点) | 删除点 | +| 鼠标双击(边) | 添加点 | + +## 新功能使用说明 + +- **多边形** + +1. 交互完成后使用Space(空格)完成交互标注,此时出现多边形边界;当需要在多边形内部继续进行交互,则使用空格切换为交互模式,此时多边形无法选中和更改。 +2. 多边形可以拖动和删除,使用鼠标左边可以对锚点进行拖动,鼠标左键双击锚点可以删除锚点,双击两点之间的边则可在此边添加一个锚点。 +3. 打开`保留最大连通块`后,所有的点击只会在图像中保留面积最大的区域,其余小区域将不会显示和保存。 + +- **格式保存** + +1. 打开保存`JSON保存`或`COCO保存`后,多边形会被记录,加载时会自动加载。 +2. 若不设置保存路径,默认保存至当前图像文件夹下的label文件夹中。 +3. 如果有图像之间名称相同但后缀名不同,可以打开`标签和图像使用相同扩展名`。 +4. 还可设置灰度保存、伪彩色保存和抠图保存,见工具栏中7-9号工具。 + +- **生成mask** + +1. 标签按住第二列可以进行拖动,最后生成mask时会根据标签列表从上往下进行覆盖。 + +- **界面模块** + +1. 可在`显示`中选择需要显示的界面模块,正常退出时将会记录界面模块的状态和位置,下次打开自动加载。 + +## 版本更新 + +- 待发版 **0.3.0**:【1】初步完成多边形编辑功能,支持对交互标注的结果进行编辑;【2】支持中/英界面;【3】支持保存为灰度/伪彩色标签和COCO格式;【4】界面拖动更加灵活;【5】标签栏可拖动,生成mask的覆盖顺序由上往下覆盖。 +- 2021.07.07 **0.2.0**:新增contrib:EISeg,可实现人像和通用图像的快速交互式标注。 + +## 开发者 + +[Yuying Hao](https://github.com/haoyuying), [Lin Han](https://github.com/linhandev/), [Yizhou Chen](https://github.com/geoyee), [Yiakwy](https://github.com/yiakwy), [GT](https://github.com/GT-ZhangAcer), [Zhiliang Yu](https://github.com/yzl19940819) diff --git a/EISeg/eiseg/__init__.py b/EISeg/eiseg/__init__.py new file mode 100644 index 0000000000..11f083a93d --- /dev/null +++ b/EISeg/eiseg/__init__.py @@ -0,0 +1,16 @@ +import sys +import os.path as osp + +pjpath = osp.dirname(osp.realpath(__file__)) +sys.path.append(pjpath) + +__APPNAME__ = "EISeg" +__VERSION__ = "0.3.0" + + +import os +import cv2 + +for k, v in os.environ.items(): + if k.startswith("QT_") and "cv2" in v: + del os.environ[k] diff --git a/contrib/EISeg/eiseg/exe.py b/EISeg/eiseg/__main__.py similarity index 100% rename from contrib/EISeg/eiseg/exe.py rename to EISeg/eiseg/__main__.py diff --git a/EISeg/eiseg/app.py b/EISeg/eiseg/app.py new file mode 100644 index 0000000000..a5066ee8ca --- /dev/null +++ b/EISeg/eiseg/app.py @@ -0,0 +1,1563 @@ +import os +import os.path as osp +from functools import partial +import sys +import json +from distutils.util import strtobool + +from qtpy import QtGui, QtCore, QtWidgets +from qtpy.QtWidgets import QMainWindow, QMessageBox, QTableWidgetItem +from qtpy.QtGui import QImage, QPixmap +from qtpy.QtCore import Qt, QByteArray, QVariant +import cv2 +import numpy as np + +from eiseg import pjpath, __APPNAME__ +from models import ModelsNick +from widget import ShortcutWindow, PolygonAnnotation +from controller import InteractiveController +from ui import Ui_EISeg +import util +from util import MODELS, COCO + + +class APP_EISeg(QMainWindow, Ui_EISeg): + IDILE, ANNING, EDITING = 0, 1, 2 + # IDILE:网络,权重,图像三者任一没有加载 + # EDITING:多边形编辑,可以交互式,但是多边形内部不能点 + # ANNING:交互式标注,只能交互式,不能编辑多边形,多边形不接hover + + def __init__(self, parent=None): + super(APP_EISeg, self).__init__(parent) + + self.settings = QtCore.QSettings( + osp.join(pjpath, "config/setting.ini"), QtCore.QSettings.IniFormat + ) + + # 初始化界面 + self.setupUi(self) + + # app变量 + self.anning = False + self.save_status = { + "gray_scale": True, + "pseudo_color": True, + "json": False, + "coco": True, + "foreground": True, + } # 是否保存这几个格式 + + self.image = None # 可能先加载图片后加载模型,只用于暂存图片 + self.controller = InteractiveController( + # self.updateImage, + predictor_params={ + "brs_mode": "NoBRS", + "zoom_in_params": { + "skip_clicks": -1, + "target_size": (400, 400), + "expansion_ratio": 1.4, + }, + "predictor_params": {"net_clicks_limit": None, "max_size": 800}, + }, + prob_thresh=self.segThresh, + ) + self.controller.setModel(MODELS[0].__name__) + # self.controller.labelList = util.LabelList() # 标签列表 + self.outputDir = None # 标签保存路径 + self.labelPaths = [] # 所有outputdir中的标签文件路径 + self.imagePaths = [] # 文件夹下所有待标注图片路径 + self.currIdx = 0 # 文件夹标注当前图片下标 + self.isDirty = False # 是否需要保存 + self.origExt = False # 是否使用图片本身拓展名,防止重名覆盖 + self.coco = COCO() + self.colorMap = util.colorMap + self.mattingBackground = [0, 0, 128] + + self.rsRGB = [0, 0, 0] # 遥感RGB索引 + self.midx = 0 # 医疗切片索引 + self.rawimg = None + self.imagesGrid = [] # 图像宫格 + # worker + self.display_dockwidget = [True, True, True, True] + self.dock_widgets = [ + self.ModelDock, + self.DataDock, + self.LabelDock, + self.ShowSetDock + ] + self.config = util.parse_configs(osp.join(pjpath, "config/config.yaml")) + self.recentModels = self.settings.value( + "recent_models", QVariant([]), type=list + ) + self.recentFiles = self.settings.value("recent_files", QVariant([]), type=list) + self.dockStatus = self.settings.value("dock_status", QVariant([]), type=list) + self.saveStatus = self.settings.value("save_status", QVariant([]), type=list) + self.layoutStatus = self.settings.value("layout_status", QByteArray()) + self.mattingColor = self.settings.value( + "matting_color", QVariant([]), type=list + ) + + # 初始化action + self.initActions() + + # 更新近期记录 + self.toggleDockWidgets(True) + self.updateModelsMenu() + self.updateRecentFile() + self.loadLayout() + + # 窗口 + ## 快捷键 + self.shortcutWindow = ShortcutWindow(self.actions, pjpath) + + ## 画布 + self.scene.clickRequest.connect(self.canvasClick) + self.canvas.zoomRequest.connect(self.viewZoomed) + self.annImage = QtWidgets.QGraphicsPixmapItem() + self.scene.addItem(self.annImage) + + ## 按钮点击 + self.btnSave.clicked.connect(self.saveLabel) # 保存 + self.listFiles.itemDoubleClicked.connect(self.imageListClicked) # 标签列表点击 + self.comboModelSelect.currentIndexChanged.connect(self.changeModel) # 模型选择 + self.btnAddClass.clicked.connect(self.addLabel) + self.btnParamsSelect.clicked.connect(self.changeParam) # 模型参数选择 + + ## 滑动 + self.sldOpacity.valueChanged.connect(self.maskOpacityChanged) + self.sldClickRadius.valueChanged.connect(self.clickRadiusChanged) + self.sldThresh.valueChanged.connect(self.threshChanged) + + ## 标签列表点击 + self.labelListTable.cellDoubleClicked.connect(self.labelListDoubleClick) + self.labelListTable.cellClicked.connect(self.labelListClicked) + self.labelListTable.cellChanged.connect(self.labelListItemChanged) + + ## 功能区选择 + # self.rsShow.currentIndexChanged.connect(self.rsShowModeChange) # 显示模型 + + def initActions(self): + tr = partial(QtCore.QCoreApplication.translate, "APP_EISeg") + action = partial(util.newAction, self) + self.actions = util.struct() + start = dir() + + # load status + if self.saveStatus != []: + for sv in self.saveStatus: + self.save_status[sv[0]] = sv[1] + + edit_shortcuts = action( + tr("&编辑快捷键"), + self.editShortcut, + "edit_shortcuts", + "Shortcut", + tr("编辑软件快捷键"), + ) + turn_prev = action( + tr("&上一张"), + partial(self.turnImg, -1), + "turn_prev", + "Prev", + tr("翻到上一张图片"), + ) + turn_next = action( + tr("&下一张"), + partial(self.turnImg, 1), + "turn_next", + "Next", + tr("翻到下一张图片"), + ) + open_image = action( + tr("&打开图像"), + self.openImage, + "open_image", + "OpenImage", + tr("打开一张图像进行标注"), + ) + open_folder = action( + tr("&打开文件夹"), + self.openFolder, + "open_folder", + "OpenFolder", + tr("打开一个文件夹下所有的图像进行标注"), + ) + change_output_dir = action( + tr("&改变标签保存路径"), + partial(self.changeOutputDir, None), + "change_output_dir", + "ChangeLabelPath", + tr("改变标签保存的文件夹路径"), + ) + load_param = action( + tr("&加载模型参数"), + self.changeParam, + "load_param", + "Model", + tr("加载一个模型参数"), + ) + finish_object = action( + tr("&完成当前目标"), + self.finishObject, + "finish_object", + "Ok", + tr("完成当前目标的标注"), + ) + clear = action( + tr("&清除所有标注"), + self.undoAll, + "clear", + "Clear", + tr("清除所有标注信息"), + ) + undo = action( + tr("&撤销"), + self.undoClick, + "undo", + "Undo", + tr("撤销一次点击"), + ) + redo = action( + tr("&重做"), + self.redoClick, + "redo", + "Redo", + tr("重做一次点击"), + ) + save = action( + tr("&保存"), + self.saveLabel, + "save", + "Save", + tr("保存图像标签"), + ) + save_as = action( + tr("&另存为"), + partial(self.saveLabel, saveAs=True), + "save_as", + "OtherSave", + tr("指定标签保存路径"), + ) + auto_save = action( + tr("&自动保存"), + self.toggleAutoSave, + "auto_save", + "AutoSave", + tr("翻页同时自动保存"), + checkable=True, + ) + # auto_save.setChecked(self.config.get("auto_save", False)) + del_active_polygon = action( + tr("&删除多边形"), + self.delActivePolygon, + "del_active_polygon", + "RemovePolygon", + tr("删除当前选中的多边形"), + ) + largest_component = action( + tr("&保留最大连通块"), + self.toggleLargestCC, + "largest_component", + "SaveMaxPolygon", + tr("保留最大的连通块"), + checkable=True, + ) + origional_extension = action( + tr("&标签和图像使用相同拓展名"), + self.toggleOrigExt, + "origional_extension", + "Same", + tr("标签和图像使用相同拓展名,用于图像中有文件名相同,拓展名不同的情况"), + checkable=True, + ) + save_pseudo = action( + tr("&伪彩色保存"), + partial(self.toggleSave, "pseudo_color"), + "save_pseudo", + "SavePseudoColor", + tr("保存为伪彩色图像"), + checkable=True, + ) + save_pseudo.setChecked(self.save_status["pseudo_color"]) + save_grayscale = action( + tr("&灰度保存"), + partial(self.toggleSave, "gray_scale"), + "save_pseudo", + "SaveGrayScale", + tr("保存为灰度图像,像素的灰度为对应类型的标签"), + checkable=True, + ) + save_grayscale.setChecked(self.save_status["gray_scale"]) + save_json = action( + tr("&JSON保存"), + partial(self.toggleSave, "json"), + "save_json", + "SaveJson", + tr("保存为JSON格式"), + checkable=True, + ) + save_json.setChecked(self.save_status["json"]) + save_coco = action( + tr("&COCO保存"), + partial(self.toggleSave, "coco"), + "save_coco", + "SaveCOCO", + tr("保存为COCO格式"), + checkable=True, + ) + save_coco.setChecked(self.save_status["coco"]) + close = action( + tr("&关闭"), + partial(self.saveImage, True), + "close", + "End", + tr("关闭当前图像"), + ) + save_matting = action( + tr("&抠图保存"), + partial(self.toggleSave, "foreground"), + "save_matting", + "SaveMatting", + tr("只保留前景,背景设置为背景色"), + checkable=True, + ) + save_matting.setChecked(self.save_status["foreground"]) + set_matting_background = action( + tr("&设置抠图背景色"), + self.setMattingBackground, + "set_matting_background", + self.mattingBackground, + tr("抠图后背景像素的颜色"), + ) + quit = action( + tr("&退出"), + self.close, + "quit", + "Close", + tr("退出软件"), + ) + save_label = action( + tr("&保存标签列表"), + partial(self.saveLabelList, None), + "save_label", + "ExportLabel", + tr("将标签保存成标签配置文件"), + ) + load_label = action( + tr("&加载标签列表"), + partial(self.loadLabelList, None), + "load_label", + "ImportLabel", + tr("从标签配置文件中加载标签"), + ) + clear_label = action( + tr("&清空标签列表"), + self.clearLabelList, + "clear_label", + "ClearLabel", + tr("清空所有的标签"), + ) + clear_recent = action( + tr("&清除标注记录"), + self.clearRecentFile, + "clear_recent", + "ClearRecent", + tr("清除近期标注记录"), + ) + model_worker = action( + tr("&模型选择"), + partial(self.changeWorkerShow, 0), + "model_worker", + "Net", + tr("模型选择"), + checkable=True, + ) + data_worker = action( + tr("&数据列表"), + partial(self.changeWorkerShow, 1), + "data_worker", + "Data", + tr("数据列表"), + checkable=True, + ) + label_worker = action( + tr("&标签列表"), + partial(self.changeWorkerShow, 2), + "label_worker", + "Label", + tr("标签列表"), + checkable=True, + ) + set_worker = action( + tr("&分割设置"), + partial(self.changeWorkerShow, 3), + "set_worker", + "Setting", + tr("分割设置"), + checkable=True, + ) + for name in dir(): + if name not in start: + self.actions.append(eval(name)) + recent_files = QtWidgets.QMenu(tr("近期文件")) + recent_files.setIcon(util.newIcon("Data")) + recent_files.aboutToShow.connect(self.updateRecentFile) + recent_params = QtWidgets.QMenu(tr("近期模型及参数")) + recent_params.setIcon(util.newIcon("Net")) + recent_params.aboutToShow.connect(self.updateModelsMenu) + languages = QtWidgets.QMenu(tr("语言")) + languages.setIcon(util.newIcon("Language")) + languages.aboutToShow.connect(self.updateLanguage) + + self.menus = util.struct( + recent_files=recent_files, + recent_params=recent_params, + languages=languages, + fileMenu=( + open_image, + open_folder, + change_output_dir, + load_param, + clear_recent, + recent_files, + recent_params, + None, + save, + save_as, + auto_save, + None, + turn_next, + turn_prev, + close, + None, + quit, + ), + labelMenu=( + save_label, + load_label, + clear_label, + ), + workMenu=( + largest_component, + del_active_polygon, + None, + origional_extension, + save_pseudo, + save_grayscale, + save_matting, + set_matting_background, + None, + save_json, + save_coco, + ), + showMenu=( + model_worker, + data_worker, + label_worker, + set_worker, + ), + helpMenu=(languages, edit_shortcuts), + toolBar=( + finish_object, + clear, + undo, + redo, + turn_prev, + turn_next, + None, + save_pseudo, + save_grayscale, + save_matting, + save_json, + save_coco, + origional_extension, + None, + largest_component, + ), + ) + + def menu(title, actions=None): + menu = self.menuBar().addMenu(title) + if actions: + util.addActions(menu, actions) + return menu + + menu(tr("文件"), self.menus.fileMenu) + menu(tr("标注"), self.menus.labelMenu) + menu(tr("功能"), self.menus.workMenu) + menu(tr("显示"), self.menus.showMenu) + menu(tr("帮助"), self.menus.helpMenu) + util.addActions(self.toolBar, self.menus.toolBar) + # foreground backgroud + if self.settings.value("matting_color"): + self.mattingBackground = [ + int(c) for c in self.settings.value("matting_color") + ] + self.actions.set_matting_background.setIcon( + util.newIcon(self.mattingBackground) + ) + + def setMattingBackground(self): + c = self.mattingBackground + color = QtWidgets.QColorDialog.getColor(QtGui.QColor(c[0], c[1], c[2]), self) + self.mattingBackground = color.getRgb()[:3] + self.settings.setValue( + "matting_color", [int(c) for c in self.mattingBackground] + ) + self.actions.set_matting_background.setIcon( + util.newIcon(self.mattingBackground) + ) + + def editShortcut(self): + self.shortcutWindow.center() + self.shortcutWindow.show() + + # 多语言 + def updateLanguage(self): + self.menus.languages.clear() + langs = os.listdir(osp.join(pjpath, "util/translate")) + langs = [n.split(".")[0] for n in langs if n.endswith("qm")] + langs.append("中文") + for lang in langs: + icon = util.newIcon(lang) + action = QtWidgets.QAction(icon, lang, self) + action.triggered.connect(partial(self.changeLanguage, lang)) + self.menus.languages.addAction(action) + + def changeLanguage(self, lang): + self.settings.setValue("language", lang) + self.warn(self.tr("切换语言"), self.tr("切换语言需要重启软件才能生效")) + + # 近期图像 + def updateRecentFile(self): + menu = self.menus.recent_files + menu.clear() + recentFiles = self.settings.value("recent_files", QVariant([]), type=list) + files = [f for f in recentFiles if osp.exists(f)] + for i, f in enumerate(files): + icon = util.newIcon("File") + action = QtWidgets.QAction( + icon, "&【%d】 %s" % (i + 1, QtCore.QFileInfo(f).fileName()), self + ) + action.triggered.connect(partial(self.openRecentImage, f)) + menu.addAction(action) + if len(files) == 0: + menu.addAction(self.tr("无近期文件")) + self.settings.setValue("recent_files", files) + + def addRecentFile(self, path): + if not osp.exists(path): + return + paths = self.settings.value("recent_files", QVariant([]), type=list) + if path not in paths: + paths.append(path) + if len(paths) > 15: + del paths[0] + self.settings.setValue("recent_files", paths) + self.updateRecentFile() + + def clearRecentFile(self): + self.settings.remove("recent_files") + self.statusbar.showMessage(self.tr("已清除最近打开文件"), 10000) + + # 模型加载 + def updateModelsMenu(self): + menu = self.menus.recent_params + menu.clear() + + self.recentModels = [ + m for m in self.recentModels if osp.exists(m["param_path"]) + ] + for idx, m in enumerate(self.recentModels): + icon = util.newIcon("Model") + action = QtWidgets.QAction( + icon, + f"&【{m['model_name']}】 {osp.basename(m['param_path'])}", + self, + ) + action.triggered.connect( + partial(self.setModelParam, m["model_name"], m["param_path"]) + ) + menu.addAction(action) + if len(self.recentModels) == 0: + menu.addAction(self.tr("无近期模型记录")) + self.settings.setValue("recent_params", self.recentModels) + + def setModelParam(self, modelName, paramPath): + if self.changeModel(ModelsNick[modelName][1]): + self.comboModelSelect.setCurrentText(self.tr(ModelsNick[modelName][0])) # 更改显示 + res = self.changeParam(paramPath) + if res: + return True + return False + + def changeModel(self, idx: int or str): + success, res = self.controller.setModel(MODELS[idx].__name__) + if not success: + self.warnException(res) + return False + return True + + def changeParam(self, param_path: str = None): + if not self.controller.modelSet: + self.warn(self.tr("选择模型结构"), self.tr("尚未选择模型结构,请在右侧下拉菜单进行选择!")) + return + if not param_path: + filters = self.tr("Paddle模型权重文件(*.pdparams)") + start_path = ( + "." + if len(self.recentModels) == 0 + else osp.dirname(self.recentModels[-1]["param_path"]) + ) + param_path, _ = QtWidgets.QFileDialog.getOpenFileName( + self, + self.tr("选择模型参数") + " - " + __APPNAME__, + start_path, + filters, + ) + if not param_path: + return False + + success, res = self.controller.setParam(param_path) + if success: + model_dict = { + "param_path": param_path, + "model_name": self.controller.modelName, + } + if model_dict not in self.recentModels: + self.recentModels.append(model_dict) + if len(self.recentModels) > 10: + del self.recentModels[0] + self.settings.setValue("recent_models", self.recentModels) + # self.status = self.ANNING + return True + else: + self.warnException(res) + return False + + def loadRecentModelParam(self): + if len(self.recentModels) == 0: + self.statusbar.showMessage(self.tr("没有最近使用模型信息,请加载模型"), 10000) + return + m = self.recentModels[-1] + model = m["model_name"] + param_path = m["param_path"] + self.setModelParam(model, param_path) + + # 标签列表 + def loadLabelList(self, file_path=None): + if file_path is None: + filters = self.tr("标签配置文件") + " (*.txt)" + file_path, _ = QtWidgets.QFileDialog.getOpenFileName( + self, + self.tr("选择标签配置文件路径") + " - " + __APPNAME__, + ".", + filters, + ) + if not osp.exists(file_path): + return + labelJson = open(file_path, "r").read() + self.controller.readLabel(file_path) + self.refreshLabelList() + self.settings.setValue("label_list_file", file_path) + + def saveLabelList(self, auto_save_path=None): + if len(self.controller.labelList) == 0: + self.warn(self.tr("没有需要保存的标签"), self.tr("请先添加标签之后再进行保存!")) + return + if auto_save_path is None: + filters = self.tr("标签配置文件") + "(*.txt)" + dlg = QtWidgets.QFileDialog(self, self.tr("保存标签配置文件"), ".", filters) + dlg.setDefaultSuffix("txt") + dlg.setAcceptMode(QtWidgets.QFileDialog.AcceptSave) + dlg.setOption(QtWidgets.QFileDialog.DontConfirmOverwrite, False) + dlg.setOption(QtWidgets.QFileDialog.DontUseNativeDialog, False) + savePath, _ = dlg.getSaveFileName( + self, self.tr("选择保存标签配置文件路径") + " - " + __APPNAME__, ".", filters + ) + else: + savePath = auto_save_path + self.controller.saveLabel(savePath) + if auto_save_path is None: + self.settings.setValue("label_list_file", savePath) + + def addLabel(self): + c = self.colorMap.get_color() + table = self.labelListTable + idx = table.rowCount() + table.insertRow(table.rowCount()) + self.controller.addLabel(idx + 1, "", c) + numberItem = QTableWidgetItem(str(idx + 1)) + numberItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 0, numberItem) + table.setItem(idx, 1, QTableWidgetItem()) + colorItem = QTableWidgetItem() + colorItem.setBackground(QtGui.QColor(c[0], c[1], c[2])) + colorItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 2, colorItem) + delItem = QTableWidgetItem() + delItem.setIcon(util.newIcon("Clear")) + delItem.setTextAlignment(Qt.AlignCenter) + delItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 3, delItem) + self.adjustTableSize() + self.labelListClicked(self.labelListTable.rowCount() - 1, 0) + + def adjustTableSize(self): + self.labelListTable.horizontalHeader().setDefaultSectionSize(25) + self.labelListTable.horizontalHeader().setSectionResizeMode( + 0, QtWidgets.QHeaderView.Fixed + ) + self.labelListTable.horizontalHeader().setSectionResizeMode( + 3, QtWidgets.QHeaderView.Fixed + ) + self.labelListTable.horizontalHeader().setSectionResizeMode( + 2, QtWidgets.QHeaderView.Fixed + ) + self.labelListTable.setColumnWidth(2, 50) + + def clearLabelList(self): + if len(self.controller.labelList) == 0: + return True + res = self.warn( + self.tr("清空标签列表?"), + self.tr("请确认是否要清空标签列表"), + QMessageBox.Yes | QMessageBox.Cancel, + ) + if res == QMessageBox.Cancel: + return False + self.controller.labelList.clear() + if self.controller: + self.controller.label_list = [] + self.controller.curr_label_number = 0 + self.labelListTable.clear() + self.labelListTable.setRowCount(0) + return True + + def refreshLabelList(self): + table = self.labelListTable + table.clearContents() + table.setRowCount(len(self.controller.labelList)) + table.setColumnCount(4) + for idx, lab in enumerate(self.controller.labelList): + numberItem = QTableWidgetItem(str(lab.idx)) + numberItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 0, numberItem) + table.setItem(idx, 1, QTableWidgetItem(lab.name)) + c = lab.color + colorItem = QTableWidgetItem() + colorItem.setBackground(QtGui.QColor(c[0], c[1], c[2])) + colorItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 2, colorItem) + delItem = QTableWidgetItem() + delItem.setIcon(util.newIcon("Clear")) + delItem.setTextAlignment(Qt.AlignCenter) + delItem.setFlags(QtCore.Qt.ItemIsEnabled) + table.setItem(idx, 3, delItem) + self.adjustTableSize() + + cols = [0, 1, 3] + for idx in cols: + table.resizeColumnToContents(idx) + self.adjustTableSize() + + def labelListDoubleClick(self, row, col): + if col != 2: + return + table = self.labelListTable + color = QtWidgets.QColorDialog.getColor() + if color.getRgb() == (0, 0, 0, 255): + return + table.item(row, col).setBackground(color) + self.controller.labelList[row].color = color.getRgb()[:3] + if self.controller: + self.controller.label_list = self.controller.labelList + for p in self.scene.polygon_items: + color = self.controller.labelList.getLabelById(p.labelIndex).color + p.setColor(color, color) + self.labelListClicked(row, 0) + + @property + def currLabelIdx(self): + return self.controller.curr_label_number - 1 + + def labelListClicked(self, row, col): + table = self.labelListTable + if col == 3: + labelIdx = int(table.item(row, 0).text()) + self.controller.labelList.remove(labelIdx) + table.removeRow(row) + + if col == 0 or col == 1: + for cl in range(2): + for idx in range(len(self.controller.labelList)): + table.item(idx, cl).setBackground(QtGui.QColor(255, 255, 255)) + table.item(row, cl).setBackground(QtGui.QColor(48, 140, 198)) + table.item(row, 0).setSelected(True) + if self.controller: + self.controller.setCurrLabelIdx(int(table.item(row, 0).text())) + self.controller.label_list = self.controller.labelList + + def labelListItemChanged(self, row, col): + self.colorMap.usedColors = self.controller.labelList.colors + try: + if col == 1: + name = self.labelListTable.item(row, col).text() + self.controller.labelList[row].name = name + except: + pass + + def delActivePolygon(self): + for idx, polygon in enumerate(self.scene.polygon_items): + if polygon.hasFocus(): + res = self.warn( + self.tr("确认删除?"), + self.tr("确认要删除当前选中多边形标注?"), + QMessageBox.Yes | QMessageBox.Cancel, + ) + if res == QMessageBox.Yes: + self.delPolygon(polygon) + + def delPolygon(self, polygon): + polygon.remove() + if self.save_status["coco"]: + if polygon.coco_id: + self.coco.delAnnotation( + polygon.coco_id, + self.coco.imgNameToId[osp.basename(self.imagePath)], + ) + self.setDirty() + + def delActivePoint(self): + for polygon in self.scene.polygon_items: + polygon.removeFocusPoint() + + # 图片/标签 io + def getMask(self): + if not self.controller or self.controller.image is None: + return + s = self.controller.image.shape + img = np.zeros([s[0], s[1]]) + # 覆盖顺序,从上往下 + len_lab = self.labelListTable.rowCount() + for i in range(len_lab): + idx = int(self.labelListTable.item(len_lab - i - 1, 0).text()) + color = self.controller.labelList.getLabelById(idx).color + for poly in self.scene.polygon_items: + if poly.labelIndex == idx: + pts = np.int32([np.array(poly.scnenePoints)]) + cv2.fillPoly(img, pts=pts, color=idx) + return img + + def openRecentImage(self, file_path): + self.queueEvent(partial(self.loadImage, file_path)) + self.listFiles.addItems([file_path.replace("\\", "/")]) + self.imagePaths.append(file_path) + + def openImage(self): + formats = [ + "*.{}".format(fmt.data().decode()) + for fmt in QtGui.QImageReader.supportedImageFormats() + ] + filters = "Image & Label files (%s)" % " ".join(formats) + + recentPath = self.settings.value("recent_files", []) + if len(recentPath) == 0: + recentPath = "." + else: + recentPath = osp.dirname(recentPath[-1]) + + file_path, _ = QtWidgets.QFileDialog.getOpenFileName( + self, + self.tr("选择待标注图片") + " - " + __APPNAME__, + recentPath, + filters, + ) + if len(file_path) == 0: + return + self.saveImage(True) # 清除 + self.queueEvent(partial(self.loadImage, file_path)) + self.listFiles.addItems([file_path.replace("\\", "/")]) + self.imagePaths.append(file_path) + + def openFolder(self): + # 1. 选择文件夹 + recentPath = self.settings.value("recent_files", []) + if len(recentPath) == 0: + recentPath = "." + else: + recentPath = osp.dirname(recentPath[-1]) + self.inputDir = QtWidgets.QFileDialog.getExistingDirectory( + self, + self.tr("选择待标注图片文件夹") + " - " + __APPNAME__, + recentPath, + QtWidgets.QFileDialog.ShowDirsOnly + | QtWidgets.QFileDialog.DontResolveSymlinks, + ) + if len(self.inputDir) == 0: + return + + # 2. 关闭当前图片,清空文件列表 + self.saveImage(close=True) + self.imagePaths = [] + self.listFiles.clear() + + # 3. 扫描文件夹下所有图片 + # 3.1 获取所有文件名 + imagePaths = os.listdir(self.inputDir) + exts = QtGui.QImageReader.supportedImageFormats() + imagePaths = [n for n in imagePaths if n.split(".")[-1] in exts] + if len(imagePaths) == 0: + return + # 3.2 设置默认输出路径为文件夹下的 label 文件夹 + opd = osp.join(self.inputDir, "label") + self.outputDir = opd + if not osp.exists(opd): + os.makedirs(opd) + # 3.3 有重名标签都保留原来拓展名 + names = [] + for name in imagePaths: + name = osp.splitext(name)[0] + if name not in names: + names.append(name) + else: + self.toggleOrigExt(True) + imagePaths = [osp.join(self.inputDir, n) for n in imagePaths] + for p in imagePaths: + if p not in self.imagePaths: + self.imagePaths.append(p) + self.listFiles.addItem(p.replace("\\", "/")) + + # 3.4 加载已有的标注 + if self.outputDir is not None and osp.exists(self.outputDir): + self.changeOutputDir(self.outputDir) + if len(self.imagePaths) != 0: + self.currIdx = 0 + self.turnImg(0) + + def loadImage(self, path): + if not path or not osp.exists(path): + return + _, ext = os.path.splitext(path) + # 1. 读取图片 + image = cv2.imdecode(np.fromfile(path, dtype=np.uint8), 1) + image = image[:, :, ::-1] # BGR转RGB + self.image = image + self.controller.setImage(image) + self.updateImage(True) + + # 2. 加载标签 + self.loadLabel(path) + self.addRecentFile(path) + self.imagePath = path + # self.status = self.ANNING + + def loadLabel(self, imgPath): + if imgPath == "": + return None + + # 1. 读取json格式标签 + if self.save_status["json"]: + + def getName(path): + return osp.splitext(osp.basename(path))[0] + + imgName = getName(imgPath) + labelPath = None + for path in self.labelPaths: + if not path.endswith(".json"): + continue + if self.origExt: + if getName(path) == osp.basename(imgPath): + labelPath = path + break + else: + if getName(path) == imgName: + labelPath = path + break + if not labelPath: + return + + labels = json.loads(open(labelPath, "r").read()) + + for label in labels: + color = label["color"] + labelIdx = label["labelIdx"] + points = label["points"] + poly = PolygonAnnotation( + labelIdx, + self.controller.image.shape, + self.delPolygon, + color, + color, + self.opacity, + ) + self.scene.addItem(poly) + self.scene.polygon_items.append(poly) + for p in points: + poly.addPointLast(QtCore.QPointF(p[0], p[1])) + + # 2. 读取coco格式标签 + if self.save_status["coco"]: + imgId = self.coco.imgNameToId.get(osp.basename(imgPath), None) + if imgId is None: + return + anns = self.coco.imgToAnns[imgId] + for ann in anns: + xys = ann["segmentation"][0] + points = [] + for idx in range(0, len(xys), 2): + points.append([xys[idx], xys[idx + 1]]) + labelIdx = ann["category_id"] + color = self.controller.labelList.getLabelById(labelIdx).color + poly = PolygonAnnotation( + ann["category_id"], + self.controller.image.shape, + self.delPolygon, + color, + color, + self.opacity, + ann["id"], + ) + self.scene.addItem(poly) + self.scene.polygon_items.append(poly) + for p in points: + poly.addPointLast(QtCore.QPointF(p[0], p[1])) + + def turnImg(self, delta): + # 1. 检查是否有图可翻,保存标签 + self.currIdx += delta + if self.currIdx >= len(self.imagePaths) or self.currIdx < 0: + self.currIdx -= delta + if delta == 1: + self.statusbar.showMessage(self.tr(f"没有后一张图片")) + else: + self.statusbar.showMessage(self.tr(f"没有前一张图片")) + self.saveImage(False) + return + else: + self.saveImage(True) + + # 2. 打开新图 + self.loadImage(self.imagePaths[self.currIdx]) + self.listFiles.setCurrentRow(self.currIdx) + self.setClean() + + def imageListClicked(self): + if not self.controller: + self.warn(self.tr("模型未加载"), self.tr("尚未加载模型,请先加载模型!")) + self.changeParam() + if not self.controller: + return + if self.controller.is_incomplete_mask: + self.saveLabel() + toRow = self.listFiles.currentRow() + delta = toRow - self.currIdx + self.turnImg(delta) + + def finishObject(self): + if not self.controller or self.image is None: + return + current_mask, curr_polygon = self.controller.finishObject() + if curr_polygon is not None: + self.updateImage() + if current_mask is not None: + color = self.controller.labelList[self.currLabelIdx].color + for points in curr_polygon: + if len(points) < 3: + continue + poly = PolygonAnnotation( + self.controller.labelList[self.currLabelIdx].idx, + self.controller.image.shape, + self.delPolygon, + color, + color, + self.opacity, + ) + poly.labelIndex = self.controller.labelList[self.currLabelIdx].idx + self.scene.addItem(poly) + self.scene.polygon_items.append(poly) + for p in points: + poly.addPointLast(QtCore.QPointF(p[0], p[1])) + self.setDirty() + # 状态改变 + if self.status == self.EDITING: + self.anning = True + for p in self.scene.polygon_items: + p.setAnning(isAnning=True) + else: + self.anning = False + for p in self.scene.polygon_items: + p.setAnning(isAnning=False) + self.getMask() + + def completeLastMask(self): + # 返回最后一个标签是否完成,false就是还有带点的 + if not self.controller or self.controller.image is None: + return True + if not self.controller.is_incomplete_mask: + return True + res = self.warn( + self.tr("完成最后一个目标?"), + self.tr("是否完成最后一个目标的标注,不完成不会进行保存。"), + QMessageBox.Yes | QMessageBox.Cancel, + ) + if res == QMessageBox.Yes: + self.finishObject() + self.setDirty() + return True + return False + + def saveImage(self, close=False): + if self.controller and self.controller.image is not None: + # 1. 完成正在交互式标注的标签 + self.completeLastMask() + # 2. 进行保存 + if self.isDirty: + if self.actions.auto_save.isChecked(): + self.saveLabel() + else: + res = self.warn( + self.tr("保存标签?"), + self.tr("标签尚未保存,是否保存标签"), + QMessageBox.Yes | QMessageBox.Cancel, + ) + if res == QMessageBox.Yes: + self.saveLabel() + self.setClean() + if close: + # 3. 清空多边形标注,删掉图片 + for p in self.scene.polygon_items[::-1]: + p.remove() + self.scene.polygon_items = [] + self.controller.resetLastObject() + self.updateImage() + self.controller.image = None + if close: + self.annImage.setPixmap(QPixmap()) + + def saveLabel(self, saveAs=False, savePath=None): + # 1. 需要处于标注状态 + if not self.controller or self.controller.image is None: + return + # 2. 完成正在交互式标注的标签 + self.completeLastMask() + # 3. 确定保存路径 + # 3.1 如果参数指定了保存路径直接存到savePath + if not savePath: + if not saveAs and self.outputDir is not None: + # 3.2 指定了标签文件夹,而且不是另存为:根据标签文件夹和文件名出保存路径 + name, ext = osp.splitext(osp.basename(self.imagePath)) + if not self.origExt: + ext = ".png" + savePath = osp.join( + self.outputDir, + name + ext, + ) + else: + # 3.3 没有指定标签存到哪,或者是另存为:弹框让用户选 + formats = [ + "*.{}".format(fmt.data().decode()) + for fmt in QtGui.QImageReader.supportedImageFormats() + ] + filters = "Label file (%s)" % " ".join(formats) + dlg = QtWidgets.QFileDialog( + self, + self.tr("保存标签文件路径"), + osp.dirname(self.imagePath), + filters, + ) + dlg.setDefaultSuffix("png") + dlg.setAcceptMode(QtWidgets.QFileDialog.AcceptSave) + dlg.setOption(QtWidgets.QFileDialog.DontConfirmOverwrite, False) + dlg.setOption(QtWidgets.QFileDialog.DontUseNativeDialog, False) + savePath, _ = dlg.getSaveFileName( + self, + self.tr("选择标签文件保存路径"), + osp.splitext(osp.basename(self.imagePath))[0] + ".png", + ) + if savePath is None or not osp.exists(osp.dirname(savePath)): + return + + if savePath not in self.labelPaths: + self.labelPaths.append(savePath) + + # 4.1 保存灰度图 + if self.save_status["gray_scale"]: + ext = osp.splitext(savePath)[1] + cv2.imencode(ext, self.getMask())[1].tofile(savePath) + # self.labelPaths.append(savePath) + + # 4.2 保存伪彩色 + if self.save_status["pseudo_color"]: + pseudoPath, ext = osp.splitext(savePath) + pseudoPath = pseudoPath + "_pseudo" + ext + s = self.controller.imgShape + pseudo = np.zeros([s[1], s[0], 3]) + mask = self.getMask() + for lab in self.controller.labelList: + pseudo[mask == lab.idx, :] = lab.color[::-1] + cv2.imencode(ext, pseudo)[1].tofile(pseudoPath) + + # 4.3 保存前景抠图 + if self.save_status["foreground"]: + mattingPath, ext = osp.splitext(savePath) + mattingPath = mattingPath + "_foreground" + ext + img = self.controller.image.copy() + img = img[:, :, ::-1] + img[self.getMask() == 0] = self.mattingBackground[::-1] + cv2.imencode(ext, img)[1].tofile(mattingPath) + + # 4.4 保存json + if self.save_status["json"]: + polygons = self.scene.polygon_items + labels = [] + for polygon in polygons: + l = self.controller.labelList[polygon.labelIndex - 1] + label = { + "name": l.name, + "labelIdx": l.idx, + "color": l.color, + "points": [], + } + for p in polygon.scnenePoints: + label["points"].append(p) + labels.append(label) + if self.origExt: + jsonPath = savePath + ".json" + else: + jsonPath = osp.splitext(savePath)[0] + ".json" + open(jsonPath, "w", encoding="utf-8").write(json.dumps(labels)) + self.labelPaths.append(jsonPath) + + # 4.5 保存coco + if self.save_status["coco"]: + if not self.coco.hasImage(osp.basename(self.imagePath)): + s = self.controller.imgShape + imgId = self.coco.addImage(osp.basename(self.imagePath), s[0], s[1]) + else: + imgId = self.coco.imgNameToId[osp.basename(self.imagePath)] + for polygon in self.scene.polygon_items: + points = [] + for p in polygon.scnenePoints: + for val in p: + points.append(val) + + if not polygon.coco_id: + annId = self.coco.addAnnotation(imgId, polygon.labelIndex, points, polygon.bbox.to_array()) + polygon.coco_id = annId + else: + self.coco.updateAnnotation(polygon.coco_id, imgId, points, polygon.bbox.to_array()) + for lab in self.controller.labelList: + if self.coco.hasCat(lab.idx): + self.coco.updateCategory(lab.idx, lab.name, lab.color) + else: + self.coco.addCategory(lab.idx, lab.name, lab.color) + saveDir = ( + self.outputDir if self.outputDir is not None else osp.dirname(savePath) + ) + cocoPath = osp.join(saveDir, "coco.json") + open(cocoPath, "w", encoding="utf-8").write(json.dumps(self.coco.dataset)) + + self.setClean() + self.statusbar.showMessage(self.tr("标签成功保存至") + " " + savePath, 5000) + + def setClean(self): + self.isDirty = False + + def setDirty(self): + self.isDirty = True + + def changeOutputDir(self, outputDir=None): + # 1. 弹框选择标签路径 + if outputDir is None: + outputDir = QtWidgets.QFileDialog.getExistingDirectory( + self, + self.tr("选择标签保存路径") + " - " + __APPNAME__, + self.settings.value("output_dir", "."), + QtWidgets.QFileDialog.ShowDirsOnly + | QtWidgets.QFileDialog.DontResolveSymlinks, + ) + if len(outputDir) == 0 or not osp.exists(outputDir): + return False + self.settings.setValue("output_dir", outputDir) + self.outputDir = outputDir + + # 2. 加载标签 + # 2.1 如果保存coco格式,加载coco标签 + if self.save_status["coco"]: + self.loadCoco() + + # 2.2 如果保存json格式,获取所有json文件名 + if self.save_status["json"]: + labelPaths = os.listdir(outputDir) + labelPaths = [n for n in labelPaths if n.endswith(".json")] + labelPaths = [osp.join(outputDir, n) for n in labelPaths] + self.labelPaths = labelPaths + + # 加载对应的标签列表 + lab_auto_save = osp.join(self.outputDir, "autosave_label.txt") + if osp.exists(lab_auto_save) == False: + lab_auto_save = osp.join(self.outputDir, "label/autosave_label.txt") + if osp.exists(lab_auto_save): + try: + self.loadLabelList(lab_auto_save) + except: + pass + return True + + def maskOpacityChanged(self): + self.sldOpacity.textLab.setText(str(self.opacity)) + if not self.controller or self.controller.image is None: + return + for polygon in self.scene.polygon_items: + polygon.setOpacity(self.opacity) + self.updateImage() + + def clickRadiusChanged(self): + self.sldClickRadius.textLab.setText(str(self.clickRadius)) + if not self.controller or self.controller.image is None: + return + self.updateImage() + + def threshChanged(self): + self.sldThresh.textLab.setText(str(self.segThresh)) + if not self.controller or self.controller.image is None: + return + self.controller.prob_thresh = self.segThresh + self.updateImage() + + def slideChanged(self): + self.sldMISlide.textLab.setText(str(self.slideMi)) + if not self.controller or self.controller.image is None: + return + self.midx = int(self.slideMi) - 1 + self.miSlideSet() + self.updateImage() + + def undoClick(self): + if self.image is None: + return + if not self.controller: + return + self.controller.undoClick() + self.updateImage() + if not self.controller.is_incomplete_mask: + self.setClean() + + def undoAll(self): + if not self.controller or self.controller.image is None: + return + self.controller.resetLastObject() + self.updateImage() + self.setClean() + + def redoClick(self): + if self.image is None: + return + if not self.controller: + return + self.controller.redoClick() + self.updateImage() + + def canvasClick(self, x, y, isLeft): + c = self.controller + if c.image is None: + return + if not c.inImage(x, y): + return + if not c.modelSet: + self.warn(self.tr("未选择模型", self.tr("尚未选择模型,请先在右上角选择模型"))) + return + if not c.paramSet: + self.warn(self.tr("未设置参数"), self.tr("尚未设置参数,请先在右上角设置参数")) + return + + if self.status == self.IDILE: + return + currLabel = self.controller.curr_label_number + if not currLabel or currLabel == 0: + self.warn(self.tr("未选择当前标签"), self.tr("请先在标签列表中单击点选标签")) + return + + self.controller.addClick(x, y, isLeft) + self.updateImage() + self.anning = True + + def updateImage(self, reset_canvas=False): + if not self.controller: + return + image = self.controller.get_visualization( + alpha_blend=self.opacity, + click_radius=self.clickRadius, + ) + height, width, _ = image.shape + bytesPerLine = 3 * width + image = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888) + if reset_canvas: + self.resetZoom(width, height) + self.annImage.setPixmap(QPixmap(image)) + + def viewZoomed(self, scale): + self.scene.scale = scale + self.scene.updatePolygonSize() + + # 界面缩放重置 + def resetZoom(self, width, height): + # 每次加载图像前设定下当前的显示框,解决图像缩小后不在中心的问题 + self.scene.setSceneRect(0, 0, width, height) + # 缩放清除 + self.canvas.scale(1 / self.canvas.zoom_all, 1 / self.canvas.zoom_all) # 重置缩放 + self.canvas.zoom_all = 1 + # 最佳缩放 + s_eps = 5e-2 + scr_cont = [ + self.scrollArea.width() / width - s_eps, + self.scrollArea.height() / height - s_eps, + ] + if scr_cont[0] * height > self.scrollArea.height(): + self.canvas.zoom_all = scr_cont[1] + else: + self.canvas.zoom_all = scr_cont[0] + self.canvas.scale(self.canvas.zoom_all, self.canvas.zoom_all) + self.scene.scale = self.canvas.zoom_all + + def queueEvent(self, function): + QtCore.QTimer.singleShot(0, function) + + def toggleOrigExt(self, dst=None): + if dst: + self.origExt = dst + else: + self.origExt = not self.origExt + self.actions.origional_extension.setChecked(self.origExt) + + def toggleAutoSave(self, save): + if save and not self.outputDir: + self.changeOutputDir(None) + if save and not self.outputDir: + save = False + self.actions.auto_save.setChecked(save) + self.settings.setValue("auto_save", save) + + def toggleSave(self, type): + self.save_status[type] = not self.save_status[type] + if type == "coco" and self.save_status["coco"]: + self.loadCoco() + if type == "coco": + self.save_status["json"] = not self.save_status["coco"] + self.actions.save_json.setChecked(self.save_status["json"]) + if type == "json": + self.save_status["coco"] = not self.save_status["json"] + self.actions.save_coco.setChecked(self.save_status["coco"]) + + def loadCoco(self, coco_path=None): + if not coco_path: + if not self.outputDir or not osp.exists(self.outputDir): + coco_path = None + else: + coco_path = osp.join(self.outputDir, "coco.json") + # 这里放在外面判断可能会有coco_path为none,exists报错 + if not osp.exists(coco_path): + coco_path = None + self.coco = COCO(coco_path) + if self.clearLabelList(): + self.controller.labelList = util.LabelList(self.coco.dataset["categories"]) + self.refreshLabelList() + + def changeWorkerShow(self, index): + self.display_dockwidget[index] = bool(self.display_dockwidget[index] - 1) + self.toggleDockWidgets() + + + def toggleDockWidgets(self, is_init=False): + if is_init == True: + if self.dockStatus != []: + if len(self.dockStatus) != len(self.menus.showMenu): + self.settings.remove("dock_status") + else: + self.display_dockwidget = [strtobool(w) for w in self.dockStatus] + for i in range(len(self.menus.showMenu)): + self.menus.showMenu[i].setChecked(bool(self.display_dockwidget[i])) + else: + self.settings.setValue("dock_status", self.display_dockwidget) + for t, w in zip(self.display_dockwidget, self.dock_widgets): + if t == True: + w.show() + else: + w.hide() + + def update_bandList(self): + bands = self.rawimg.shape[-1] if len(self.rawimg.shape) == 3 else 1 + for i in range(len(self.bandCombos)): + self.bandCombos[i].currentIndexChanged.disconnect() + self.bandCombos[i].clear() + self.bandCombos[i].addItems([("band_" + str(j + 1)) for j in range(bands)]) + try: + self.bandCombos[i].setCurrentIndex(self.rsRGB[i]) + except IndexError: + pass + for bandCombo in self.bandCombos: + bandCombo.currentIndexChanged.connect(self.rsBandSet) # 设置波段 + + def toggleLargestCC(self, on): + try: + self.controller.filterLargestCC(on) + except: + pass + + @property + def opacity(self): + return self.sldOpacity.value() / 100 + + @property + def clickRadius(self): + return self.sldClickRadius.value() + + @property + def segThresh(self): + return self.sldThresh.value() / 100 + + @property + def slideMi(self): + return self.sldMISlide.value() + + def warnException(self, e): + e = str(e) + title = e.split("。")[0] + self.warn(title, e) + + def warn(self, title, text, buttons=QMessageBox.Yes): + msg = QMessageBox() + msg.setIcon(QMessageBox.Warning) + msg.setWindowTitle(title) + msg.setText(text) + msg.setStandardButtons(buttons) + return msg.exec_() + + @property + def status(self): + if not self.controller: + return self.IDILE + c = self.controller + if not c.paramSet or not c.modelSet or c.image is None: + return self.IDILE + if self.anning: + return self.ANNING + return self.EDITING + + # 加载界面 + def loadLayout(self): + self.restoreState(self.layoutStatus) + + def closeEvent(self, event): + # 保存界面 + self.settings.setValue("layout_status", QByteArray(self.saveState())) + self.settings.setValue( + "save_status", [(k, self.save_status[k]) for k in self.save_status.keys()] + ) + # 如果设置了保存路径,把标签也保存下 + if self.outputDir is not None and len(self.controller.labelList) != 0: + self.saveLabelList(osp.join(self.outputDir, "autosave_label.txt")) + # 关闭主窗体退出程序,子窗体也关闭 + sys.exit(0) diff --git a/EISeg/eiseg/config/colormap.txt b/EISeg/eiseg/config/colormap.txt new file mode 100644 index 0000000000..8583495612 --- /dev/null +++ b/EISeg/eiseg/config/colormap.txt @@ -0,0 +1,20 @@ +53,119,181 +245,128,6 +67,159,36 +204,43,41 +145,104,190 +135,86,75 +219,120,195 +127,127,127 +187,189,18 +72,190,207 +178,199,233 +248,187,118 +160,222,135 +247,153,150 +195,176,214 +192,156,148 +241,183,211 +199,199,199 +218,219,139 +166,218,229 \ No newline at end of file diff --git a/EISeg/eiseg/config/config.yaml b/EISeg/eiseg/config/config.yaml new file mode 100644 index 0000000000..49580fe855 --- /dev/null +++ b/EISeg/eiseg/config/config.yaml @@ -0,0 +1,36 @@ +shortcut: + about: Q + auto_save: X + change_output_dir: Shift+Z + clear: Ctrl+Shift+Z + clear_label: '' + clear_recent: '' + close: Ctrl+W + data_worker: '' + del_active_polygon: Backspace + edit_shortcuts: E + finish_object: Space + grid_ann: '' + label_worker: '' + largest_component: '' + load_label: '' + load_param: Ctrl+M + medical_worker: '' + model_worker: '' + open_folder: Shift+A + open_image: Ctrl+A + origional_extension: '' + quick_start: '' + quit: '' + redo: Ctrl+Y + remote_worker: '' + save: '' + save_as: '' + save_coco: '' + save_json: '' + save_label: '' + save_pseudo: '' + set_worker: '' + turn_next: F + turn_prev: S + undo: Ctrl+Z diff --git a/EISeg/eiseg/controller.py b/EISeg/eiseg/controller.py new file mode 100644 index 0000000000..5bfeb0fdfb --- /dev/null +++ b/EISeg/eiseg/controller.py @@ -0,0 +1,405 @@ +import time +import json +import cv2 +import numpy as np +from skimage.measure import label + +from inference import clicker +from inference.predictor import get_predictor +import util +from util.vis import draw_with_blend_and_clicks +from util import MODELS, LabelList + + +class InteractiveController: + def __init__( + self, + predictor_params: dict = None, + prob_thresh: float = 0.5, + ): + """初始化控制器. + + Parameters + ---------- + predictor_params : dict + 推理器配置 + prob_thresh : float + 区分前景和背景结果的阈值 + + """ + self.predictor_params = predictor_params + self.prob_thresh = prob_thresh + self.model = None + self.image = None + self.predictor = None + self.clicker = clicker.Clicker() + self.states = [] + self.probs_history = [] + self.polygons = [] + + # 用于redo + self.undo_states = [] + self.undo_probs_history = [] + + self.curr_label_number = 0 + self._result_mask = None + self.labelList = LabelList() + self.lccFilter = False + + def filterLargestCC(self, do_filter: bool): + """设置是否只保留推理结果中的最大联通块 + + Parameters + ---------- + do_filter : bool + 是否只保存推理结果中的最大联通块 + """ + if not isinstance(do_filter, bool): + return + self.lccFilter = do_filter + + def setModel(self, modelName: str): + """设置推理其模型. + + Parameters + ---------- + modelName : str + 模型名称,模型类中的__name__属性 + + Returns + ------- + bool, str + 是否成功设置模型, 失败原因 + + """ + if not isinstance(modelName, str): + return False, "模型名应为str类型" + try: + self.model = MODELS[modelName]() + except KeyError as e: + return False, str(e) + return True, "模型设置成功" + + def setParam(self, paramPath: str): + """设置模型使用的推理参数 + + Parameters + ---------- + paramPath : str + 推理参数路径 + + Returns + ------- + bool, str + 是否设置成功, 失败原因 + + """ + if not self.modelSet: + return False, "模型未设置,请先设置模型" + try: + self.model.load_param(paramPath) + except Exception as e: + return False, str(e) + return True, "权重设置成功" + + def setImage(self, image: np.array): + """设置当前标注的图片 + + Parameters + ---------- + image : np.array + 当前标注的图片 + + """ + self.image = image + self._result_mask = np.zeros(image.shape[:2], dtype=np.uint8) + self.resetLastObject() + + # 标签操作 + def setLabelList(self, labelList: json): + """设置标签列表,会覆盖已有的标签列表 + + Parameters + ---------- + labelList : json + 标签列表格式为 + { + { + "idx" : int (like 0 or 1 or 2) + "name" : str (like "car" or "airplan") + "color" : list (like [255, 0, 0]) + }, + ... + } + + Returns + ------- + type + Description of returned object. + + """ + self.labelList.clear() + labels = json.loads(labelList) + for lab in labels: + self.labelList.add(lab["id"], lab["name"], lab["color"]) + + def addLabel(self, id: int, name: str, color: list): + self.labelList.add(id, name, color) + + def delLabel(self, id: int): + self.labelList.remove(id) + + def clearLabel(self): + self.labelList.clear() + + def readLabel(self, path): + self.labelList.readLabel(path) + + def saveLabel(self, path): + self.labelList.saveLabel(path) + + # 点击操作 + def addClick(self, x: int, y: int, is_positive: bool): + """添加一个点并运行推理,保存历史用于undo + + Parameters + ---------- + x : int + 点击的横坐标 + y : int + 点击的纵坐标 + is_positive : bool + 是否点的是正点 + + Returns + ------- + bool, str + 点击是否添加成功, 失败原因 + + """ + + # 1. 确定可以点 + if not self.inImage(x, y): + return False, "点击越界" + if not self.modelSet: + return False, "模型未设置" + if not self.paramSet: + return False, "参数未设置" + if not self.imageSet: + return False, "图像未设置" + + if len(self.states) == 0: # 保存一个空状态 + self.states.append( + { + "clicker": self.clicker.get_state(), + "predictor": self.predictor.get_states(), + } + ) + + # 2. 添加点击,跑推理 + click = clicker.Click(is_positive=is_positive, coords=(y, x)) + self.clicker.add_click(click) + pred = self.predictor.get_prediction(self.clicker) + + # 3. 保存状态 + self.states.append( + { + "clicker": self.clicker.get_state(), + "predictor": self.predictor.get_states(), + } + ) + if self.probs_history: + self.probs_history.append((self.probs_history[-1][1], pred)) + else: + self.probs_history.append((np.zeros_like(pred), pred)) + + # 点击之后就不能接着之前的历史redo了 + self.undo_states = [] + self.undo_probs_history = [] + return True, "点击添加成功" + + def undoClick(self): + """ + undo一步点击 + """ + if len(self.states) <= 1: # == 1就只剩下一个空状态了,不用再退 + return + self.undo_states.append(self.states.pop()) + self.clicker.set_state(self.states[-1]["clicker"]) + self.predictor.set_states(self.states[-1]["predictor"]) + self.undo_probs_history.append(self.probs_history.pop()) + if not self.probs_history: + self.reset_init_mask() + + def redoClick(self): + """ + redo一步点击 + """ + if len(self.undo_states) == 0: # 如果还没撤销过 + return + if len(self.undo_probs_history) >= 1: + next_state = self.undo_states.pop() + self.states.append(next_state) + self.clicker.set_state(next_state["clicker"]) + self.predictor.set_states(next_state["predictor"]) + self.probs_history.append(self.undo_probs_history.pop()) + + def finishObject(self): + """ + 结束当前物体标注,准备标下一个 + """ + object_prob = self.current_object_prob + if object_prob is None: + return None, None + object_mask = object_prob > self.prob_thresh + polygon = util.get_polygon(object_mask.astype(np.uint8) * 255) + if polygon is not None: + if self.lccFilter: + object_mask = self.getLargestCC(object_mask) + self._result_mask[object_mask] = self.curr_label_number + self.resetLastObject() + self.polygons.append([self.curr_label_number, polygon]) + return object_mask, polygon + + # 多边形 + def getPolygon(self): + return self.polygon + + def setPolygon(self, polygon): + self.polygon = polygon + + # mask + def getMask(self): + s = self.imgShape + img = np.zeros([s[0], s[1]]) + for poly in self.polygons: + pts = np.int32([np.array(poly[1])]) + cv2.fillPoly(img, pts=pts, color=poly[0]) + return img + + def setCurrLabelIdx(self, number): + if not isinstance(number, int): + return False + self.curr_label_number = number + + def resetLastObject(self, update_image=True): + """ + 重置控制器状态 + Parameters + update_image(bool): 是否更新图像 + """ + self.states = [] + self.probs_history = [] + self.undo_states = [] + self.undo_probs_history = [] + # self.current_object_prob = None + self.clicker.reset_clicks() + self.reset_predictor() + self.reset_init_mask() + + def reset_predictor(self, predictor_params=None): + """ + 重置推理器,可以换推理配置 + Parameters + predictor_params(dict): 推理配置 + """ + if predictor_params is not None: + self.predictor_params = predictor_params + self.predictor = get_predictor(self.model.model, **self.predictor_params) + if self.image is not None: + self.predictor.set_input_image(self.image) + + def reset_init_mask(self): + self.clicker.click_indx_offset = 0 + + def getLargestCC(self, mask): + mask = label(mask) + if mask.max() == 0: + return mask + mask = mask == np.argmax(np.bincount(mask.flat)[1:]) + 1 + return mask + + def get_visualization(self, alpha_blend: float, click_radius: int): + if self.image is None: + return None + # 1. 正在标注的mask + # results_mask_for_vis = self.result_mask # 加入之前标完的mask + results_mask_for_vis = np.zeros_like(self.result_mask) + results_mask_for_vis *= self.curr_label_number + if self.probs_history: + results_mask_for_vis[ + self.current_object_prob > self.prob_thresh + ] = self.curr_label_number + if self.lccFilter: + results_mask_for_vis = ( + self.getLargestCC(results_mask_for_vis) * self.curr_label_number + ) + vis = draw_with_blend_and_clicks( + self.image, + mask=results_mask_for_vis, + alpha=alpha_blend, + clicks_list=self.clicker.clicks_list, + radius=click_radius, + palette=self.palette, + ) + return vis + + def inImage(self, x: int, y: int): + s = self.image.shape + if x < 0 or y < 0 or x >= s[1] or y >= s[0]: + print("点击越界") + return False + return True + + @property + def result_mask(self): + result_mask = self._result_mask.copy() + return result_mask + + @property + def palette(self): + if self.labelList: + colors = [ml.color for ml in self.labelList] + colors.insert(0, [0, 0, 0]) + else: + colors = [[0, 0, 0]] + return colors + + @property + def current_object_prob(self): + """ + 获取当前推理标签 + """ + if self.probs_history: + _, current_prob_additive = self.probs_history[-1] + return current_prob_additive + else: + return None + + @property + def is_incomplete_mask(self): + """ + Returns + bool: 当前的物体是不是还没标完 + """ + return len(self.probs_history) > 0 + + @property + def imgShape(self): + return self.image.shape[1::-1] + + @property + def paramSet(self): + return self.model.paramSet + + @property + def modelSet(self): + return self.model is not None + + @property + def modelName(self): + return self.model.__name__ + + @property + def imageSet(self): + return self.image is not None diff --git a/EISeg/eiseg/exe.py b/EISeg/eiseg/exe.py new file mode 100644 index 0000000000..bea395435d --- /dev/null +++ b/EISeg/eiseg/exe.py @@ -0,0 +1,9 @@ +import os.path as osp +import sys + +sys.path.append(osp.dirname(osp.dirname(osp.realpath(__file__)))) + +from run import main + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/__init__.py b/EISeg/eiseg/inference/__init__.py similarity index 100% rename from contrib/EISeg/eiseg/data/__init__.py rename to EISeg/eiseg/inference/__init__.py diff --git a/contrib/EISeg/eiseg/inference/clicker.py b/EISeg/eiseg/inference/clicker.py similarity index 100% rename from contrib/EISeg/eiseg/inference/clicker.py rename to EISeg/eiseg/inference/clicker.py diff --git a/EISeg/eiseg/inference/predictor/__init__.py b/EISeg/eiseg/inference/predictor/__init__.py new file mode 100644 index 0000000000..7dc779840f --- /dev/null +++ b/EISeg/eiseg/inference/predictor/__init__.py @@ -0,0 +1,27 @@ +from .base import BasePredictor +from inference.transforms import ZoomIn + + +def get_predictor(net, brs_mode, + with_flip=True, + zoom_in_params=dict(), + predictor_params=None): + + predictor_params_ = { + 'optimize_after_n_clicks': 1 + } + + if zoom_in_params is not None: + zoom_in = ZoomIn(**zoom_in_params) + else: + zoom_in = None + + if brs_mode == 'NoBRS': + + if predictor_params is not None: + predictor_params_.update(predictor_params) + predictor = BasePredictor(net, zoom_in=zoom_in, with_flip=with_flip, **predictor_params_) + + else: + raise NotImplementedError('Just support NoBRS mode') + return predictor \ No newline at end of file diff --git a/contrib/EISeg/eiseg/inference/predictor/base.py b/EISeg/eiseg/inference/predictor/base.py similarity index 90% rename from contrib/EISeg/eiseg/inference/predictor/base.py rename to EISeg/eiseg/inference/predictor/base.py index b8d0024d29..44f4b794c3 100644 --- a/contrib/EISeg/eiseg/inference/predictor/base.py +++ b/EISeg/eiseg/inference/predictor/base.py @@ -1,4 +1,3 @@ -from numpy.lib.type_check import imag import paddle import paddle.nn.functional as F import numpy as np @@ -66,28 +65,24 @@ def get_prediction(self, clicker, prev_mask=None): if hasattr(self.net, 'with_prev_mask') and self.net.with_prev_mask: input_image = paddle.concat([input_image, prev_mask], axis=1) - # np.save('test_output/input_image.npy', input_image) image_nd, clicks_lists, is_image_changed = self.apply_transforms( input_image, [clicks_list] ) - # np.save('test_output/image_nd.npy', image_nd) pred_logits = self._get_prediction(image_nd, clicks_lists, is_image_changed) prediction = F.interpolate(pred_logits, mode='bilinear', align_corners=True, size=image_nd.shape[2:]) - # np.save('test_output/pred_logits.npy', pred_logits.numpy()) for t in reversed(self.transforms): prediction = t.inv_transform(prediction) - # np.save('test_output/prediction_inv_transform.npy', prediction.numpy()) + if self.zoom_in is not None and self.zoom_in.check_possible_recalculation(): return self.get_prediction(clicker) self.prev_prediction = prediction - # np.save('test_output/prediction.npy', prediction.numpy()) + return prediction.numpy()[0, 0] def _get_prediction(self, image_nd, clicks_lists, is_image_changed): points_nd = self.get_points_nd(clicks_lists) - # np.save('test_output/points_nd.npy', points_nd.numpy()) return self.net(image_nd, points_nd)['instances'] def _get_transform_states(self): @@ -102,7 +97,6 @@ def apply_transforms(self, image_nd, clicks_lists): is_image_changed = False for t in self.transforms: image_nd, clicks_lists = t.transform(image_nd, clicks_lists) - print("trans:", image_nd.shape, ' t:', t) is_image_changed |= t.image_changed return image_nd, clicks_lists, is_image_changed @@ -133,4 +127,4 @@ def get_states(self): def set_states(self, states): self._set_transform_states(states['transform_states']) - self.prev_prediction = states['prev_prediction'] + self.prev_prediction = states['prev_prediction'] \ No newline at end of file diff --git a/contrib/EISeg/eiseg/inference/transforms/__init__.py b/EISeg/eiseg/inference/transforms/__init__.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/__init__.py rename to EISeg/eiseg/inference/transforms/__init__.py diff --git a/contrib/EISeg/eiseg/inference/transforms/base.py b/EISeg/eiseg/inference/transforms/base.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/base.py rename to EISeg/eiseg/inference/transforms/base.py diff --git a/contrib/EISeg/eiseg/inference/transforms/crops.py b/EISeg/eiseg/inference/transforms/crops.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/crops.py rename to EISeg/eiseg/inference/transforms/crops.py diff --git a/contrib/EISeg/eiseg/inference/transforms/flip.py b/EISeg/eiseg/inference/transforms/flip.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/flip.py rename to EISeg/eiseg/inference/transforms/flip.py diff --git a/contrib/EISeg/eiseg/inference/transforms/limit_longest_side.py b/EISeg/eiseg/inference/transforms/limit_longest_side.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/limit_longest_side.py rename to EISeg/eiseg/inference/transforms/limit_longest_side.py diff --git a/contrib/EISeg/eiseg/inference/transforms/zoom_in.py b/EISeg/eiseg/inference/transforms/zoom_in.py similarity index 100% rename from contrib/EISeg/eiseg/inference/transforms/zoom_in.py rename to EISeg/eiseg/inference/transforms/zoom_in.py diff --git a/contrib/EISeg/eiseg/inference/__init__.py b/EISeg/eiseg/model/__init__.py similarity index 100% rename from contrib/EISeg/eiseg/inference/__init__.py rename to EISeg/eiseg/model/__init__.py diff --git a/contrib/EISeg/eiseg/model/is_hrnet_model.py b/EISeg/eiseg/model/is_hrnet_model.py similarity index 65% rename from contrib/EISeg/eiseg/model/is_hrnet_model.py rename to EISeg/eiseg/model/is_hrnet_model.py index 737c609b1d..e8bff90cd6 100644 --- a/contrib/EISeg/eiseg/model/is_hrnet_model.py +++ b/EISeg/eiseg/model/is_hrnet_model.py @@ -1,9 +1,6 @@ import paddle.nn as nn - -from util.serialization import serialize from .is_model import ISModel from .modeling.hrnet_ocr import HighResolutionNet -from model.modifiers import LRMult class HRNetModel(ISModel): @@ -13,12 +10,6 @@ def __init__(self, width=48, ocr_width=256, small=False, backbone_lr_mult=0.1, self.feature_extractor = HighResolutionNet(width=width, ocr_width=ocr_width, small=small, num_classes=1, norm_layer=norm_layer) - self.feature_extractor.apply(LRMult(backbone_lr_mult)) - if ocr_width > 0: - self.feature_extractor.ocr_distri_head.apply(LRMult(1.0)) - self.feature_extractor.ocr_gather_head.apply(LRMult(1.0)) - self.feature_extractor.conv3x3_ocr.apply(LRMult(1.0)) - def backbone_forward(self, image, coord_features=None): net_outputs = self.feature_extractor(image, coord_features) diff --git a/contrib/EISeg/eiseg/model/is_model.py b/EISeg/eiseg/model/is_model.py similarity index 92% rename from contrib/EISeg/eiseg/model/is_model.py rename to EISeg/eiseg/model/is_model.py index d34c53be87..57d2a92fca 100644 --- a/contrib/EISeg/eiseg/model/is_model.py +++ b/EISeg/eiseg/model/is_model.py @@ -59,24 +59,18 @@ def __init__(self, use_rgb_conv=True, with_aux_output=False, def forward(self, image, points): image, prev_mask = self.prepare_input(image) - # np.save('test_output/image.npy', image.numpy()) - # np.save('test_output/prev_mask.npy', prev_mask.numpy()) coord_features = self.get_coord_features(image, prev_mask, points) - # np.save('test_output/coord_features.npy', coord_features.numpy()) if self.rgb_conv is not None: x = self.rgb_conv(paddle.concat((image, coord_features), axis=1)) outputs = self.backbone_forward(x) else: coord_features = self.maps_transform(coord_features) - # np.save('test_output/coord_features.npy', coord_features.numpy()) outputs = self.backbone_forward(image, coord_features) outputs['instances'] = nn.functional.interpolate(outputs['instances'], size=image.shape[2:], mode='bilinear', align_corners=True) - # np.save('test_output/outputs_instances.npy', outputs['instances'].numpy()) if self.with_aux_output: outputs['instances_aux'] = nn.functional.interpolate(outputs['instances_aux'], size=image.shape[2:], mode='bilinear', align_corners=True) - # np.save('test_output/outputs_instances_aux.npy', outputs['instances_aux'].numpy()) return outputs def prepare_input(self, image): diff --git a/contrib/EISeg/eiseg/model/__init__.py b/EISeg/eiseg/model/modeling/__init__.py similarity index 100% rename from contrib/EISeg/eiseg/model/__init__.py rename to EISeg/eiseg/model/modeling/__init__.py diff --git a/contrib/EISeg/eiseg/model/modeling/hrnet_ocr.py b/EISeg/eiseg/model/modeling/hrnet_ocr.py similarity index 91% rename from contrib/EISeg/eiseg/model/modeling/hrnet_ocr.py rename to EISeg/eiseg/model/modeling/hrnet_ocr.py index 1445c6b243..857fef9170 100644 --- a/contrib/EISeg/eiseg/model/modeling/hrnet_ocr.py +++ b/EISeg/eiseg/model/modeling/hrnet_ocr.py @@ -324,36 +324,20 @@ def _make_stage(self, block, num_inchannels, return nn.Sequential(*modules), num_inchannels def forward(self, x, additional_features=None): - # np.save('test_output/x.npy', x.numpy()) - # np.save('test_output/additional_features.npy', additional_features.numpy()) feats = self.compute_hrnet_feats(x, additional_features) - # np.save('test_output/feats.npy', feats.numpy()) if self.ocr_width > 0: out_aux = self.aux_head(feats) - # np.save('test_output/out_aux.npy', out_aux.numpy()) feats = self.conv3x3_ocr(feats) - # np.save('test_output/feats_conv3x3_ocr.npy', feats.numpy()) context = self.ocr_gather_head(feats, out_aux) - # np.save('test_output/context.npy', context.numpy()) feats = self.ocr_distri_head(feats, context) - # np.save('test_output/feats_ocr_distri_head.npy', feats.numpy()) -# print('feats', feats) out = self.cls_head(feats) - # np.save('test_output/out.npy', out.numpy()) -# for name, params in self.cls_head.named_parameters(): -# print(name) -# print(params) -# print('out', out) -# raise Exception("***************************") return [out, out_aux] else: return [self.cls_head(feats), None] def compute_hrnet_feats(self, x, additional_features): x = self.compute_pre_stage_features(x, additional_features) - # np.save('test_output/x_compute_pre_stage_features.npy', x.numpy()) x = self.layer1(x) - # np.save('test_output/x_layer1', x.numpy()) x_list = [] for i in range(self.stage2_num_branches): if self.transition1[i] is not None: @@ -361,7 +345,6 @@ def compute_hrnet_feats(self, x, additional_features): else: x_list.append(x) y_list = self.stage2(x_list) - # np.save('test_output/y_list_stage2_0.npy', y_list[0].numpy()) x_list = [] for i in range(self.stage3_num_branches): if self.transition2[i] is not None: @@ -372,7 +355,6 @@ def compute_hrnet_feats(self, x, additional_features): else: x_list.append(y_list[i]) y_list = self.stage3(x_list) - # np.save('test_output/y_list_stage3_0.npy', y_list[0].numpy()) x_list = [] for i in range(self.stage4_num_branches): if self.transition3[i] is not None: @@ -383,23 +365,16 @@ def compute_hrnet_feats(self, x, additional_features): else: x_list.append(y_list[i]) x = self.stage4(x_list) - # np.save('test_output/y_list_stage4_0.npy', y_list[0].numpy()) return self.aggregate_hrnet_features(x) def compute_pre_stage_features(self, x, additional_features): x = self.conv1(x) - # np.save('test_output/x_conv1.npy', x.numpy()) x = self.bn1(x) - # np.save('test_output/x_bn1.npy', x.numpy()) x = self.relu(x) - # np.save('test_output/x_relu.npy', x.numpy()) if additional_features is not None: x = x + additional_features - # np.save('test_output/x_add.npy', x.numpy()) x = self.conv2(x) - # np.save('test_output/x_conv2.npy', x.numpy()) x = self.bn2(x) - # np.save('test_output/x_bn2.npy', x.numpy()) return self.relu(x) def aggregate_hrnet_features(self, x): @@ -411,9 +386,6 @@ def aggregate_hrnet_features(self, x): mode='bilinear', align_corners=self.align_corners) x3 = F.interpolate(x[3], size=(x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) - # np.save('test_output/x1.npy', x1.numpy()) - # np.save('test_output/x2.npy', x2.numpy()) - # np.save('test_output/x3.npy', x3.numpy()) return paddle.concat([x[0], x1, x2, x3], axis=1) def load_pretrained_weights(self, pretrained_path=''): diff --git a/contrib/EISeg/eiseg/model/modeling/ocr.py b/EISeg/eiseg/model/modeling/ocr.py similarity index 100% rename from contrib/EISeg/eiseg/model/modeling/ocr.py rename to EISeg/eiseg/model/modeling/ocr.py diff --git a/contrib/EISeg/eiseg/model/modeling/resnetv1b.py b/EISeg/eiseg/model/modeling/resnetv1b.py similarity index 100% rename from contrib/EISeg/eiseg/model/modeling/resnetv1b.py rename to EISeg/eiseg/model/modeling/resnetv1b.py diff --git a/contrib/EISeg/eiseg/model/ops.py b/EISeg/eiseg/model/ops.py similarity index 98% rename from contrib/EISeg/eiseg/model/ops.py rename to EISeg/eiseg/model/ops.py index 4a34a157b8..26e837e980 100644 --- a/contrib/EISeg/eiseg/model/ops.py +++ b/EISeg/eiseg/model/ops.py @@ -1,7 +1,6 @@ import paddle import paddle.nn as nn import numpy as np -import model.initializer as initializer class DistMaps(nn.Layer): diff --git a/EISeg/eiseg/models.py b/EISeg/eiseg/models.py new file mode 100644 index 0000000000..02736f572e --- /dev/null +++ b/EISeg/eiseg/models.py @@ -0,0 +1,91 @@ +import os.path as osp +from abc import ABC, abstractmethod + +import paddle + +from model.is_hrnet_model import HRNetModel +from util import MODELS + +here = osp.dirname(osp.abspath(__file__)) + + +class EISegModel: + @abstractmethod + def __init__(self): + self.paramSet = False + try: + self.create_model() + except AssertionError: + ver = paddle.__version__ + if ver < "2.1.0": + raise Exception("模型创建失败。Paddle版本低于2.1.0,请升级paddlepaddle") + else: + raise Exception("模型创建失败。请参考官网教程检查Paddle安装是否正确,GPU版本请注意是否正确安装显卡驱动。") + + def load_param(self, param_path): + params = self.get_param(param_path) + if params: + try: + self.model.set_dict(params) + self.model.eval() + except: + raise Exception("权重设置失败。请参考官网教程检查Paddle安装是否正确,GPU版本请注意是否正确安装显卡驱动。") + self.paramSet = True + return True + else: + return None + + def get_param(self, param_path): + print("param_path", self.__name__, param_path) + if param_path is None or not osp.exists(param_path): + raise Exception(f"权重路径{param_path}不存在。请指定正确的模型路径") + params = paddle.load(param_path) + pkeys = params.keys() + mkeys = self.model.named_parameters() + if len(pkeys) != len(list(mkeys)): + raise Exception("权重和模型不匹配。请确保指定的权重和模型对应") + for p, m in zip(pkeys, mkeys): + if p != m[0]: + raise Exception("权重和模型不匹配。请确保指定的权重和模型对应") + return params + + +ModelsNick = {"HRNet18s_OCR48": ["轻量级模型", 0], + "HRNet18_OCR64": ["高精度模型", 1]} + +@MODELS.add_component +class HRNet18s_OCR48(EISegModel): + __name__ = "HRNet18s_OCR48" + + def create_model(self): + self.model = HRNetModel( + width=18, + ocr_width=48, + small=True, + with_aux_output=True, + use_rgb_conv=False, + use_leaky_relu=True, + use_disks=True, + with_prev_mask=True, + norm_radius=5, + cpu_dist_maps=False, + ) + + +@MODELS.add_component +class HRNet18_OCR64(EISegModel): + __name__ = "HRNet18_OCR64" + + def create_model(self): + self.model = HRNetModel( + width=18, + ocr_width=64, + small=False, + with_aux_output=True, + use_leaky_relu=True, + use_rgb_conv=False, + use_disks=True, + norm_radius=5, + with_prev_mask=True, + cpu_dist_maps=False, # 目前打包cython有些问题,先默认用False + ) diff --git a/EISeg/eiseg/resource/AutoSave.png b/EISeg/eiseg/resource/AutoSave.png new file mode 100644 index 0000000000..03d4e14618 Binary files /dev/null and b/EISeg/eiseg/resource/AutoSave.png differ diff --git a/EISeg/eiseg/resource/ChangeLabelPath.png b/EISeg/eiseg/resource/ChangeLabelPath.png new file mode 100644 index 0000000000..7a1c988005 Binary files /dev/null and b/EISeg/eiseg/resource/ChangeLabelPath.png differ diff --git a/EISeg/eiseg/resource/Clear.png b/EISeg/eiseg/resource/Clear.png new file mode 100644 index 0000000000..2df950a47e Binary files /dev/null and b/EISeg/eiseg/resource/Clear.png differ diff --git a/EISeg/eiseg/resource/ClearLabel.png b/EISeg/eiseg/resource/ClearLabel.png new file mode 100644 index 0000000000..d13a42946a Binary files /dev/null and b/EISeg/eiseg/resource/ClearLabel.png differ diff --git a/EISeg/eiseg/resource/ClearRecent.png b/EISeg/eiseg/resource/ClearRecent.png new file mode 100644 index 0000000000..8972f457a7 Binary files /dev/null and b/EISeg/eiseg/resource/ClearRecent.png differ diff --git a/EISeg/eiseg/resource/Close.png b/EISeg/eiseg/resource/Close.png new file mode 100644 index 0000000000..8d7c2d8cf8 Binary files /dev/null and b/EISeg/eiseg/resource/Close.png differ diff --git a/EISeg/eiseg/resource/Data.png b/EISeg/eiseg/resource/Data.png new file mode 100644 index 0000000000..1da9c6aa2b Binary files /dev/null and b/EISeg/eiseg/resource/Data.png differ diff --git a/EISeg/eiseg/resource/End.png b/EISeg/eiseg/resource/End.png new file mode 100644 index 0000000000..f32ed3e512 Binary files /dev/null and b/EISeg/eiseg/resource/End.png differ diff --git a/EISeg/eiseg/resource/English.png b/EISeg/eiseg/resource/English.png new file mode 100644 index 0000000000..4de7daa93e Binary files /dev/null and b/EISeg/eiseg/resource/English.png differ diff --git a/EISeg/eiseg/resource/ExportLabel.png b/EISeg/eiseg/resource/ExportLabel.png new file mode 100644 index 0000000000..efa1aa6164 Binary files /dev/null and b/EISeg/eiseg/resource/ExportLabel.png differ diff --git a/EISeg/eiseg/resource/File.png b/EISeg/eiseg/resource/File.png new file mode 100644 index 0000000000..115ad0a168 Binary files /dev/null and b/EISeg/eiseg/resource/File.png differ diff --git a/EISeg/eiseg/resource/ImportLabel.png b/EISeg/eiseg/resource/ImportLabel.png new file mode 100644 index 0000000000..98c9b83388 Binary files /dev/null and b/EISeg/eiseg/resource/ImportLabel.png differ diff --git a/EISeg/eiseg/resource/Label.png b/EISeg/eiseg/resource/Label.png new file mode 100644 index 0000000000..de42654fee Binary files /dev/null and b/EISeg/eiseg/resource/Label.png differ diff --git a/EISeg/eiseg/resource/Language.png b/EISeg/eiseg/resource/Language.png new file mode 100644 index 0000000000..1de4643c45 Binary files /dev/null and b/EISeg/eiseg/resource/Language.png differ diff --git a/EISeg/eiseg/resource/Model.png b/EISeg/eiseg/resource/Model.png new file mode 100644 index 0000000000..b080598b56 Binary files /dev/null and b/EISeg/eiseg/resource/Model.png differ diff --git a/EISeg/eiseg/resource/Net.png b/EISeg/eiseg/resource/Net.png new file mode 100644 index 0000000000..bff8b52c90 Binary files /dev/null and b/EISeg/eiseg/resource/Net.png differ diff --git a/EISeg/eiseg/resource/Next.png b/EISeg/eiseg/resource/Next.png new file mode 100644 index 0000000000..584b06a71e Binary files /dev/null and b/EISeg/eiseg/resource/Next.png differ diff --git a/EISeg/eiseg/resource/Ok.png b/EISeg/eiseg/resource/Ok.png new file mode 100644 index 0000000000..4bde7179bc Binary files /dev/null and b/EISeg/eiseg/resource/Ok.png differ diff --git a/EISeg/eiseg/resource/OpenFolder.png b/EISeg/eiseg/resource/OpenFolder.png new file mode 100644 index 0000000000..e43bc84c7b Binary files /dev/null and b/EISeg/eiseg/resource/OpenFolder.png differ diff --git a/EISeg/eiseg/resource/OpenImage.png b/EISeg/eiseg/resource/OpenImage.png new file mode 100644 index 0000000000..34623c6989 Binary files /dev/null and b/EISeg/eiseg/resource/OpenImage.png differ diff --git a/EISeg/eiseg/resource/OtherSave.png b/EISeg/eiseg/resource/OtherSave.png new file mode 100644 index 0000000000..e56889a0ca Binary files /dev/null and b/EISeg/eiseg/resource/OtherSave.png differ diff --git a/contrib/EISeg/eiseg/resource/Paddle.png b/EISeg/eiseg/resource/Paddle.png similarity index 100% rename from contrib/EISeg/eiseg/resource/Paddle.png rename to EISeg/eiseg/resource/Paddle.png diff --git a/EISeg/eiseg/resource/Prev.png b/EISeg/eiseg/resource/Prev.png new file mode 100644 index 0000000000..03b3daecaf Binary files /dev/null and b/EISeg/eiseg/resource/Prev.png differ diff --git a/EISeg/eiseg/resource/Redo.png b/EISeg/eiseg/resource/Redo.png new file mode 100644 index 0000000000..15c0ad0c95 Binary files /dev/null and b/EISeg/eiseg/resource/Redo.png differ diff --git a/EISeg/eiseg/resource/RemovePolygon.png b/EISeg/eiseg/resource/RemovePolygon.png new file mode 100644 index 0000000000..631ae492b0 Binary files /dev/null and b/EISeg/eiseg/resource/RemovePolygon.png differ diff --git a/EISeg/eiseg/resource/Same.png b/EISeg/eiseg/resource/Same.png new file mode 100644 index 0000000000..2e603af891 Binary files /dev/null and b/EISeg/eiseg/resource/Same.png differ diff --git a/EISeg/eiseg/resource/Save.png b/EISeg/eiseg/resource/Save.png new file mode 100644 index 0000000000..cbe9dfba48 Binary files /dev/null and b/EISeg/eiseg/resource/Save.png differ diff --git a/EISeg/eiseg/resource/SaveCOCO.png b/EISeg/eiseg/resource/SaveCOCO.png new file mode 100644 index 0000000000..ca15b55127 Binary files /dev/null and b/EISeg/eiseg/resource/SaveCOCO.png differ diff --git a/EISeg/eiseg/resource/SaveGrayScale.png b/EISeg/eiseg/resource/SaveGrayScale.png new file mode 100644 index 0000000000..7fff4e1099 Binary files /dev/null and b/EISeg/eiseg/resource/SaveGrayScale.png differ diff --git a/EISeg/eiseg/resource/SaveJson.png b/EISeg/eiseg/resource/SaveJson.png new file mode 100644 index 0000000000..dce7c28921 Binary files /dev/null and b/EISeg/eiseg/resource/SaveJson.png differ diff --git a/EISeg/eiseg/resource/SaveMatting.png b/EISeg/eiseg/resource/SaveMatting.png new file mode 100644 index 0000000000..2952e8432f Binary files /dev/null and b/EISeg/eiseg/resource/SaveMatting.png differ diff --git a/EISeg/eiseg/resource/SaveMaxPolygon.png b/EISeg/eiseg/resource/SaveMaxPolygon.png new file mode 100644 index 0000000000..932654563d Binary files /dev/null and b/EISeg/eiseg/resource/SaveMaxPolygon.png differ diff --git a/EISeg/eiseg/resource/SavePseudoColor.png b/EISeg/eiseg/resource/SavePseudoColor.png new file mode 100644 index 0000000000..444163f61a Binary files /dev/null and b/EISeg/eiseg/resource/SavePseudoColor.png differ diff --git a/EISeg/eiseg/resource/Setting.png b/EISeg/eiseg/resource/Setting.png new file mode 100644 index 0000000000..54f85b4390 Binary files /dev/null and b/EISeg/eiseg/resource/Setting.png differ diff --git a/EISeg/eiseg/resource/Shortcut.png b/EISeg/eiseg/resource/Shortcut.png new file mode 100644 index 0000000000..10d90b1f1c Binary files /dev/null and b/EISeg/eiseg/resource/Shortcut.png differ diff --git a/EISeg/eiseg/resource/Undo.png b/EISeg/eiseg/resource/Undo.png new file mode 100644 index 0000000000..7b9c1313d0 Binary files /dev/null and b/EISeg/eiseg/resource/Undo.png differ diff --git "a/EISeg/eiseg/resource/\344\270\255\346\226\207.png" "b/EISeg/eiseg/resource/\344\270\255\346\226\207.png" new file mode 100644 index 0000000000..a2e263c811 Binary files /dev/null and "b/EISeg/eiseg/resource/\344\270\255\346\226\207.png" differ diff --git a/EISeg/eiseg/run.py b/EISeg/eiseg/run.py new file mode 100644 index 0000000000..95fd9b6957 --- /dev/null +++ b/EISeg/eiseg/run.py @@ -0,0 +1,26 @@ +import sys +import os.path as osp + +from qtpy.QtWidgets import QApplication # 导入PyQt相关模块 +from qtpy import QtCore + +from eiseg import pjpath +from app import APP_EISeg # 导入带槽的界面 + + +def main(): + app = QApplication(sys.argv) + lang = QtCore.QSettings( + osp.join(pjpath, "config/setting.ini"), QtCore.QSettings.IniFormat + ).value("language") + if lang != "中文": + trans = QtCore.QTranslator(app) + trans.load(osp.join(pjpath, f"util/translate/{lang}")) + app.installTranslator(trans) + + window = APP_EISeg() # 创建对象 + window.showMaximized() # 全屏显示窗口 + # 加载近期模型 + QApplication.processEvents() + window.loadRecentModelParam() + sys.exit(app.exec_()) diff --git a/EISeg/eiseg/ui.py b/EISeg/eiseg/ui.py new file mode 100644 index 0000000000..6d54275c77 --- /dev/null +++ b/EISeg/eiseg/ui.py @@ -0,0 +1,224 @@ +from eiseg.widget.create import creat_dock, create_button, create_slider, create_text +import os.path as osp +from functools import partial + +from qtpy import QtCore, QtGui, QtWidgets +from qtpy.QtCore import Qt + +from eiseg import pjpath, __APPNAME__ +from models import ModelsNick +from util import MODELS +from widget import AnnotationScene, AnnotationView +from widget.create import * +from widget.table import TableWidget + + +class Ui_EISeg(object): + def __init__(self): + super(Ui_EISeg, self).__init__() + self.tr = partial(QtCore.QCoreApplication.translate, "APP_EISeg") + + def setupUi(self, MainWindow): + ## -- 主窗体设置 -- + MainWindow.setObjectName("MainWindow") + MainWindow.setMinimumSize(QtCore.QSize(1366, 768)) + MainWindow.setWindowTitle(__APPNAME__) + CentralWidget = QtWidgets.QWidget(MainWindow) + CentralWidget.setObjectName("CentralWidget") + MainWindow.setCentralWidget(CentralWidget) + ## ----- + ## -- 工具栏 -- + toolBar = QtWidgets.QToolBar(self) + sizePolicy = QtWidgets.QSizePolicy( + QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum + ) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(toolBar.sizePolicy().hasHeightForWidth()) + toolBar.setSizePolicy(sizePolicy) + toolBar.setMinimumSize(QtCore.QSize(0, 33)) + toolBar.setMovable(True) + toolBar.setAllowedAreas(QtCore.Qt.BottomToolBarArea | QtCore.Qt.TopToolBarArea) + toolBar.setObjectName("toolBar") + self.toolBar = toolBar + MainWindow.addToolBar(QtCore.Qt.TopToolBarArea, self.toolBar) + ## ----- + ## -- 状态栏 -- + self.statusbar = QtWidgets.QStatusBar(MainWindow) + self.statusbar.setObjectName("statusbar") + self.statusbar.setStyleSheet("QStatusBar::item {border: none;}") + MainWindow.setStatusBar(self.statusbar) + self.statusbar.addPermanentWidget( + self.show_logo(osp.join(pjpath, "resource/Paddle.png")) + ) + ## ----- + ## -- 图形区域 -- + ImageRegion = QtWidgets.QHBoxLayout(CentralWidget) + ImageRegion.setObjectName("ImageRegion") + # 滑动区域 + self.scrollArea = QtWidgets.QScrollArea(CentralWidget) + self.scrollArea.setWidgetResizable(True) + self.scrollArea.setObjectName("scrollArea") + ImageRegion.addWidget(self.scrollArea) + # 图形显示 + self.scene = AnnotationScene() + self.scene.addPixmap(QtGui.QPixmap()) + self.canvas = AnnotationView(self.scene, self) + sizePolicy = QtWidgets.QSizePolicy( + QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding + ) + self.canvas.setSizePolicy(sizePolicy) + self.canvas.setAlignment(QtCore.Qt.AlignCenter) + self.canvas.setAutoFillBackground(False) + self.canvas.setStyleSheet("background-color: White") + self.canvas.setObjectName("canvas") + self.scrollArea.setWidget(self.canvas) + ## ----- + ## -- 工作区 -- + p_create_dock = partial(self.creat_dock, MainWindow) + p_create_button = partial(self.create_button, CentralWidget) + # 模型加载 + widget = QtWidgets.QWidget() + horizontalLayout = QtWidgets.QHBoxLayout(widget) + ModelRegion = QtWidgets.QVBoxLayout() + ModelRegion.setObjectName("ModelRegion") + # labShowSet = self.create_text(CentralWidget, "labShowSet", "模型选择") + # ModelRegion.addWidget(labShowSet) + combo = QtWidgets.QComboBox(self) + combo.addItems([self.tr(ModelsNick[m.__name__][0]) for m in MODELS]) + self.comboModelSelect = combo + ModelRegion.addWidget(self.comboModelSelect) + # 网络参数 + self.btnParamsSelect = p_create_button( + "btnParamsLoad", + self.tr("加载网络参数"), + osp.join(pjpath, "resource/Model.png"), + "Ctrl+D", + ) + ModelRegion.addWidget(self.btnParamsSelect) # 模型选择 + horizontalLayout.addLayout(ModelRegion) + self.ModelDock = p_create_dock("ModelDock", self.tr("模型选择"), widget) + MainWindow.addDockWidget(QtCore.Qt.DockWidgetArea(2), self.ModelDock) + # 数据列表 + # TODO: 数据列表加一个搜索功能 + widget = QtWidgets.QWidget() + horizontalLayout = QtWidgets.QHBoxLayout(widget) + ListRegion = QtWidgets.QVBoxLayout() + ListRegion.setObjectName("ListRegion") + # labFiles = self.create_text(CentralWidget, "labFiles", "数据列表") + # ListRegion.addWidget(labFiles) + self.listFiles = QtWidgets.QListWidget(CentralWidget) + self.listFiles.setObjectName("ListFiles") + ListRegion.addWidget(self.listFiles) + # 保存 + self.btnSave = p_create_button( + "btnSave", + self.tr("保存"), + osp.join(pjpath, "resource/Save.png"), + "Ctrl+S", + ) + ListRegion.addWidget(self.btnSave) + horizontalLayout.addLayout(ListRegion) + self.DataDock = p_create_dock("DataDock", self.tr("数据列表"), widget) + MainWindow.addDockWidget(QtCore.Qt.DockWidgetArea(2), self.DataDock) + # 标签列表 + widget = QtWidgets.QWidget() + horizontalLayout = QtWidgets.QHBoxLayout(widget) + LabelRegion = QtWidgets.QVBoxLayout() + LabelRegion.setObjectName("LabelRegion") + self.labelListTable = TableWidget(CentralWidget) # QtWidgets.QTableWidget(CentralWidget) + self.labelListTable.horizontalHeader().hide() + # 铺满 + self.labelListTable.horizontalHeader().setSectionResizeMode( + QtWidgets.QHeaderView.Stretch + ) + self.labelListTable.verticalHeader().hide() + self.labelListTable.setColumnWidth(0, 10) + # self.labelListTable.setMinimumWidth() + self.labelListTable.setObjectName("labelListTable") + self.labelListTable.clearContents() + self.labelListTable.setRowCount(0) + self.labelListTable.setColumnCount(4) + + LabelRegion.addWidget(self.labelListTable) + self.btnAddClass = p_create_button( + "btnAddClass", self.tr("添加标签"), osp.join(pjpath, "resource/Label.png") + ) + LabelRegion.addWidget(self.btnAddClass) + horizontalLayout.addLayout(LabelRegion) + self.LabelDock = p_create_dock("LabelDock", self.tr("标签列表"), widget) + MainWindow.addDockWidget(QtCore.Qt.DockWidgetArea(2), self.LabelDock) + ## 滑块设置 + # 分割阈值 + p_create_slider = partial(self.create_slider, CentralWidget) + widget = QtWidgets.QWidget() + horizontalLayout = QtWidgets.QHBoxLayout(widget) + ShowSetRegion = QtWidgets.QVBoxLayout() + ShowSetRegion.setObjectName("ShowSetRegion") + self.sldThresh, SegShowRegion = p_create_slider( + "sldThresh", "labThresh", self.tr("分割阈值:") + ) + ShowSetRegion.addLayout(SegShowRegion) + ShowSetRegion.addWidget(self.sldThresh) + # 透明度 + self.sldOpacity, MaskShowRegion = p_create_slider( + "sldOpacity", "labOpacity", self.tr("标签透明度:"), 75 + ) + ShowSetRegion.addLayout(MaskShowRegion) + ShowSetRegion.addWidget(self.sldOpacity) + # 点大小 + self.sldClickRadius, PointShowRegion = p_create_slider( + "sldClickRadius", "labClickRadius", self.tr("点击可视化半径:"), 3, 10, 1 + ) + ShowSetRegion.addLayout(PointShowRegion) + ShowSetRegion.addWidget(self.sldClickRadius) + horizontalLayout.addLayout(ShowSetRegion) + self.ShowSetDock = p_create_dock("ShowSetDock", self.tr("分割设置"), widget) + MainWindow.addDockWidget(QtCore.Qt.DockWidgetArea(2), self.ShowSetDock) + QtCore.QMetaObject.connectSlotsByName(MainWindow) + + ## 创建文本 + def create_text(self, parent, text_name=None, text_text=None): + return create_text(parent, text_name, text_text) + + ## 创建按钮 + def create_button(self, parent, btn_name, btn_text, ico_path=None, curt=None): + return create_button(parent, btn_name, btn_text, ico_path, curt) + + ## 创建dock + def creat_dock(self, parent, name, text, layout): + return creat_dock(parent, name, text, layout) + + ## 显示Logo + def show_logo(self, logo_path): + labLogo = QtWidgets.QLabel() + sizePolicy = QtWidgets.QSizePolicy( + QtWidgets.QSizePolicy.Maximum, QtWidgets.QSizePolicy.Maximum + ) + labLogo.setSizePolicy(sizePolicy) + labLogo.setMaximumSize(QtCore.QSize(100, 33)) + labLogo.setPixmap(QtGui.QPixmap(logo_path)) + labLogo.setScaledContents(True) + labLogo.setObjectName("labLogo") + return labLogo + + ## 创建滑块区域 + def create_slider( + self, + parent, + sld_name, + text_name, + text, + default_value=50, + max_value=100, + text_rate=0.01, + ): + return create_slider( + parent, + sld_name, + text_name, + text, + default_value, + max_value, + text_rate, + ) diff --git a/EISeg/eiseg/util/__init__.py b/EISeg/eiseg/util/__init__.py new file mode 100644 index 0000000000..803d0f3172 --- /dev/null +++ b/EISeg/eiseg/util/__init__.py @@ -0,0 +1,8 @@ +from .qt import newAction, addActions, struct, newIcon +from .config import parse_configs, save_configs +from .colormap import colorMap +from .polygon import get_polygon, Instructions +from .manager import MODELS +from .language import TransUI +from .coco.coco import COCO +from .label import LabelList diff --git a/EISeg/eiseg/util/coco.py.bk b/EISeg/eiseg/util/coco.py.bk new file mode 100644 index 0000000000..93d0bfda8a --- /dev/null +++ b/EISeg/eiseg/util/coco.py.bk @@ -0,0 +1,144 @@ +cocoDict = { + "info": info, + "images": [image], + "annotations": [annotation], + "categories": [ + { + "id": int, + "name": str, + "supercategory": str, + } + ], + "licenses": [license], +} +license = { + "id": int, + "name": str, + "url": str, +} +image = { + "id": int, + "width": int, + "height": int, + "file_name": str, + "license": int, + "flickr_url": str, + "coco_url": str, + "date_captured": datetime, +} +annotation = { + "id": int, + "image_id": int, + "category_id": int, + "segmentation": [polygon], + "area": float, + "bbox": [x, y, width, height], +} +info = { + "year": int, + "version": str, + "description": str, + "contributor": str, + "url": str, + "date_created": datetime, +} +import datetime + + +class CoCoAnn: + def __init__(self, cocoFile=None): + self.dict = { + "info": {}, + "images": [], + "annotations": [], + "categories": [], + "licenses": [], + } + self.annId = 0 + + def setInfo( + self, + year: int = "", + version="", + description="", + contributor="", + url="", + date_created="", + ): + # if not year: + # now = datetime.now() + # year = now.strftime("%Y") + # # TODO: datetime + # if not date_created: + # pass + self.dict["info"] = { + "year": year, + "version": version, + "description": description, + "contributor": contributor, + "url": url, + "date_created": date_created, + } + + def setCategories(self, categories): + self.dict["categories"] = categories + + def addCategory(self, id, name, supercategory=""): + cat = { + "id": int, + "name": str, + "supercategory": str, + } + self.dict["categories"].append(cat) + + def setLicenses(self, licenses): + self.licenses = licenses + + def addLicense(self, id, name, url): + license = { + "id": int, + "name": str, + "url": str, + } + self.dict["licenses"].append(license) + + def addImage( + self, + id, + width, + height, + file_name, + license="", + flickr_url="", + coco_url="", + date_captured="", + ): + image = { + "id": id, + "width": width, + "height": height, + "file_name": file_name, + "license": license, + "flickr_url": flickr_url, + "coco_url": coco_url, + "date_captured": date_captured, + } + self.dict["images"].append(image) + + def addAnnotation( + self, + image_id, + category_id, + segmentation, + bbox, + area, + id, + ): + { + "id": int, + "image_id": int, + "category_id": int, + "segmentation": [polygon], + "area": float, + "bbox": [x, y, width, height], + } diff --git a/EISeg/eiseg/util/coco/__init__.py b/EISeg/eiseg/util/coco/__init__.py new file mode 100644 index 0000000000..3f7d85bba8 --- /dev/null +++ b/EISeg/eiseg/util/coco/__init__.py @@ -0,0 +1 @@ +__author__ = 'tylin' diff --git a/EISeg/eiseg/util/coco/_mask.pyx b/EISeg/eiseg/util/coco/_mask.pyx new file mode 100644 index 0000000000..d065837f99 --- /dev/null +++ b/EISeg/eiseg/util/coco/_mask.pyx @@ -0,0 +1,308 @@ +# distutils: language = c +# distutils: sources = ../common/maskApi.c + +#************************************************************************** +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +# Licensed under the Simplified BSD License [see coco/license.txt] +#************************************************************************** + +__author__ = 'tsungyi' + +import sys +PYTHON_VERSION = sys.version_info[0] + +# import both Python-level and C-level symbols of Numpy +# the API uses Numpy to interface C and Python +import numpy as np +cimport numpy as np +from libc.stdlib cimport malloc, free + +# intialized Numpy. must do. +np.import_array() + +# import numpy C function +# we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management +cdef extern from "numpy/arrayobject.h": + void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) + +# Declare the prototype of the C functions in MaskApi.h +cdef extern from "maskApi.h": + ctypedef unsigned int uint + ctypedef unsigned long siz + ctypedef unsigned char byte + ctypedef double* BB + ctypedef struct RLE: + siz h, + siz w, + siz m, + uint* cnts, + void rlesInit( RLE **R, siz n ) + void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) + void rleDecode( const RLE *R, byte *mask, siz n ) + void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) + void rleArea( const RLE *R, siz n, uint *a ) + void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) + void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) + void rleToBbox( const RLE *R, BB bb, siz n ) + void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) + void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) + char* rleToString( const RLE *R ) + void rleFrString( RLE *R, char *s, siz h, siz w ) + +# python class to wrap RLE array in C +# the class handles the memory allocation and deallocation +cdef class RLEs: + cdef RLE *_R + cdef siz _n + + def __cinit__(self, siz n =0): + rlesInit(&self._R, n) + self._n = n + + # free the RLE array here + def __dealloc__(self): + if self._R is not NULL: + for i in range(self._n): + free(self._R[i].cnts) + free(self._R) + def __getattr__(self, key): + if key == 'n': + return self._n + raise AttributeError(key) + +# python class to wrap Mask array in C +# the class handles the memory allocation and deallocation +cdef class Masks: + cdef byte *_mask + cdef siz _h + cdef siz _w + cdef siz _n + + def __cinit__(self, h, w, n): + self._mask = malloc(h*w*n* sizeof(byte)) + self._h = h + self._w = w + self._n = n + # def __dealloc__(self): + # the memory management of _mask has been passed to np.ndarray + # it doesn't need to be freed here + + # called when passing into np.array() and return an np.ndarray in column-major order + def __array__(self): + cdef np.npy_intp shape[1] + shape[0] = self._h*self._w*self._n + # Create a 1D array, and reshape it to fortran/Matlab column-major array + ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') + # The _mask allocated by Masks is now handled by ndarray + PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) + return ndarray + +# internal conversion from Python RLEs object to compressed RLE format +def _toString(RLEs Rs): + cdef siz n = Rs.n + cdef bytes py_string + cdef char* c_string + objs = [] + for i in range(n): + c_string = rleToString( &Rs._R[i] ) + py_string = c_string + objs.append({ + 'size': [Rs._R[i].h, Rs._R[i].w], + 'counts': py_string + }) + free(c_string) + return objs + +# internal conversion from compressed RLE format to Python RLEs object +def _frString(rleObjs): + cdef siz n = len(rleObjs) + Rs = RLEs(n) + cdef bytes py_string + cdef char* c_string + for i, obj in enumerate(rleObjs): + if PYTHON_VERSION == 2: + py_string = str(obj['counts']).encode('utf8') + elif PYTHON_VERSION == 3: + py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] + else: + raise Exception('Python version must be 2 or 3') + c_string = py_string + rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) + return Rs + +# encode mask to RLEs objects +# list of RLE string can be generated by RLEs member function +def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): + h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] + cdef RLEs Rs = RLEs(n) + rleEncode(Rs._R,mask.data,h,w,n) + objs = _toString(Rs) + return objs + +# decode mask from compressed list of RLE string or RLEs object +def decode(rleObjs): + cdef RLEs Rs = _frString(rleObjs) + h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n + masks = Masks(h, w, n) + rleDecode(Rs._R, masks._mask, n); + return np.array(masks) + +def merge(rleObjs, intersect=0): + cdef RLEs Rs = _frString(rleObjs) + cdef RLEs R = RLEs(1) + rleMerge(Rs._R, R._R, Rs._n, intersect) + obj = _toString(R)[0] + return obj + +def area(rleObjs): + cdef RLEs Rs = _frString(rleObjs) + cdef uint* _a = malloc(Rs._n* sizeof(uint)) + rleArea(Rs._R, Rs._n, _a) + cdef np.npy_intp shape[1] + shape[0] = Rs._n + a = np.array((Rs._n, ), dtype=np.uint8) + a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) + PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) + return a + +# iou computation. support function overload (RLEs-RLEs and bbox-bbox). +def iou( dt, gt, pyiscrowd ): + def _preproc(objs): + if len(objs) == 0: + return objs + if type(objs) == np.ndarray: + if len(objs.shape) == 1: + objs = objs.reshape((objs[0], 1)) + # check if it's Nx4 bbox + if not len(objs.shape) == 2 or not objs.shape[1] == 4: + raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') + objs = objs.astype(np.double) + elif type(objs) == list: + # check if list is in box format and convert it to np.ndarray + isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) + isrle = np.all(np.array([type(obj) == dict for obj in objs])) + if isbox: + objs = np.array(objs, dtype=np.double) + if len(objs.shape) == 1: + objs = objs.reshape((1,objs.shape[0])) + elif isrle: + objs = _frString(objs) + else: + raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') + else: + raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') + return objs + def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): + rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) + def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): + bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) + def _len(obj): + cdef siz N = 0 + if type(obj) == RLEs: + N = obj.n + elif len(obj)==0: + pass + elif type(obj) == np.ndarray: + N = obj.shape[0] + return N + # convert iscrowd to numpy array + cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) + # simple type checking + cdef siz m, n + dt = _preproc(dt) + gt = _preproc(gt) + m = _len(dt) + n = _len(gt) + if m == 0 or n == 0: + return [] + if not type(dt) == type(gt): + raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') + + # define local variables + cdef double* _iou = 0 + cdef np.npy_intp shape[1] + # check type and assign iou function + if type(dt) == RLEs: + _iouFun = _rleIou + elif type(dt) == np.ndarray: + _iouFun = _bbIou + else: + raise Exception('input data type not allowed.') + _iou = malloc(m*n* sizeof(double)) + iou = np.zeros((m*n, ), dtype=np.double) + shape[0] = m*n + iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) + PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) + _iouFun(dt, gt, iscrowd, m, n, iou) + return iou.reshape((m,n), order='F') + +def toBbox( rleObjs ): + cdef RLEs Rs = _frString(rleObjs) + cdef siz n = Rs.n + cdef BB _bb = malloc(4*n* sizeof(double)) + rleToBbox( Rs._R, _bb, n ) + cdef np.npy_intp shape[1] + shape[0] = 4*n + bb = np.array((1,4*n), dtype=np.double) + bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) + PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) + return bb + +def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): + cdef siz n = bb.shape[0] + Rs = RLEs(n) + rleFrBbox( Rs._R, bb.data, h, w, n ) + objs = _toString(Rs) + return objs + +def frPoly( poly, siz h, siz w ): + cdef np.ndarray[np.double_t, ndim=1] np_poly + n = len(poly) + Rs = RLEs(n) + for i, p in enumerate(poly): + np_poly = np.array(p, dtype=np.double, order='F') + rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) + objs = _toString(Rs) + return objs + +def frUncompressedRLE(ucRles, siz h, siz w): + cdef np.ndarray[np.uint32_t, ndim=1] cnts + cdef RLE R + cdef uint *data + n = len(ucRles) + objs = [] + for i in range(n): + Rs = RLEs(1) + cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) + # time for malloc can be saved here but it's fine + data = malloc(len(cnts)* sizeof(uint)) + for j in range(len(cnts)): + data[j] = cnts[j] + R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) + Rs._R[0] = R + objs.append(_toString(Rs)[0]) + return objs + +def frPyObjects(pyobj, h, w): + # encode rle from a list of python objects + if type(pyobj) == np.ndarray: + objs = frBbox(pyobj, h, w) + elif type(pyobj) == list and len(pyobj[0]) == 4: + objs = frBbox(pyobj, h, w) + elif type(pyobj) == list and len(pyobj[0]) > 4: + objs = frPoly(pyobj, h, w) + elif type(pyobj) == list and type(pyobj[0]) == dict \ + and 'counts' in pyobj[0] and 'size' in pyobj[0]: + objs = frUncompressedRLE(pyobj, h, w) + # encode rle from single python object + elif type(pyobj) == list and len(pyobj) == 4: + objs = frBbox([pyobj], h, w)[0] + elif type(pyobj) == list and len(pyobj) > 4: + objs = frPoly([pyobj], h, w)[0] + elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: + objs = frUncompressedRLE([pyobj], h, w)[0] + else: + raise Exception('input type is not supported.') + return objs diff --git a/EISeg/eiseg/util/coco/coco.py b/EISeg/eiseg/util/coco/coco.py new file mode 100644 index 0000000000..d99088835a --- /dev/null +++ b/EISeg/eiseg/util/coco/coco.py @@ -0,0 +1,652 @@ +import json +import time +import matplotlib.pyplot as plt +from matplotlib.collections import PatchCollection +from matplotlib.patches import Polygon +import numpy as np +import copy +import itertools + +# from . import mask as maskUtils +import os +from collections import defaultdict +import sys +from datetime import datetime + +PYTHON_VERSION = sys.version_info[0] +if PYTHON_VERSION == 2: + from urllib import urlretrieve +elif PYTHON_VERSION == 3: + from urllib.request import urlretrieve + + +def _isArrayLike(obj): + return hasattr(obj, "__iter__") and hasattr(obj, "__len__") + + +class COCO: + def __init__(self, annotation_file=None): + """ + Constructor of Microsoft COCO helper class for reading and visualizing annotations. + :param annotation_file (str): location of annotation file + :param image_folder (str): location to the folder that hosts images. + :return: + """ + # dataset, anns, cats, imgs, imgToAnns, catToImgs, imgNameToId, maxAnnId, maxImgId + self.dataset = { + "categories": [], + "images": [], + "annotations": [], + "info": "info", + "licenses": "licenses", + } # the complete json + self.anns = dict() # anns[annId]={} + self.cats = dict() # cats[catId] = {} + self.imgs = dict() # imgs[imgId] = {} + self.imgToAnns = defaultdict(list) # imgToAnns[imgId] = [ann] + self.catToImgs = defaultdict(list) # catToImgs[catId] = [imgId] + self.imgNameToId = defaultdict(list) # imgNameToId[name] = imgId + self.maxAnnId = 0 + self.maxImgId = 0 + if annotation_file is not None: + print("loading annotations into memory...") + tic = time.time() + dataset = json.load(open(annotation_file, "r")) + assert ( + type(dataset) == dict + ), "annotation file format {} not supported".format(type(dataset)) + print("Done (t={:0.2f}s)".format(time.time() - tic)) + self.dataset = dataset + self.createIndex() + print( + f"load coco with {len(self.dataset['images'])} images and {len(self.dataset['annotations'])} annotations." + ) + + def hasImage(self, imageName): + imgId = self.imgNameToId.get(imageName, None) + return imgId is not None + + def hasCat(self, catIdx): + res = self.cats.get(catIdx) + return res is not None + + def createIndex(self): + # create index + print("creating index...") + anns, cats, imgs = {}, {}, {} + imgNameToId, imgToAnns, catToImgs, imgNameToId = [ + defaultdict(list) for _ in range(4) + ] + if "annotations" in self.dataset: + for ann in self.dataset["annotations"]: + imgToAnns[ann["image_id"]].append(ann) + anns[ann["id"]] = ann + self.maxAnnId = max(self.maxAnnId, ann["id"]) + + if "images" in self.dataset: + for img in self.dataset["images"]: + imgs[img["id"]] = img + imgNameToId[img["file_name"]] = img["id"] + try: + imgId = int(img["id"]) + self.maxImgId = max(self.maxImgId, imgId) + except: + pass + + if "categories" in self.dataset: + for cat in self.dataset["categories"]: + cats[cat["id"]] = cat + + if "annotations" in self.dataset and "categories" in self.dataset: + for ann in self.dataset["annotations"]: + catToImgs[ann["category_id"]].append(ann["image_id"]) + # TODO: read license + print("index created!") + + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgNameToId = imgNameToId + self.imgs = imgs + self.cats = cats + + def setInfo( + self, + year: int = "", + version: str = "", + description: str = "", + contributor: str = "", + url: str = "", + date_created: datetime = "", + ): + self.dataset["info"] = { + "year": year, + "version": version, + "description": description, + "contributor": contributor, + "url": url, + "date_created": date_created, + } + + def addCategory( + self, + id: int, + name: str, + color: list, + supercategory: str = "", + ): + cat = { + "id": id, + "name": name, + "color": color, + "supercategory": supercategory, + } + self.cats[id] = cat + self.dataset["categories"].append(cat) + + def updateCategory( + self, + id: int, + name: str, + color: list, + supercategory: str = "", + ): + cat = { + "id": id, + "name": name, + "color": color, + "supercategory": supercategory, + } + self.cats[id] = cat + for idx in range(len(self.dataset["categories"])): + if self.dataset["categories"][idx]["id"] == id: + self.dataset["categories"][idx] = cat + + def addImage( + self, + file_name: str, + width: int, + height: int, + id: int = None, + license: int = "", + flickr_url: str = "", + coco_url: str = "", + date_captured: datetime = "", + ): + if self.hasImage(file_name): + print(f"{file_name}图片已存在") + return + if not id: + self.maxImgId += 1 + id = self.maxImgId + image = { + "id": id, + "width": width, + "height": height, + "file_name": file_name, + "license": license, + "flickr_url": flickr_url, + "coco_url": coco_url, + "date_captured": date_captured, + } + self.dataset["images"].append(image) + self.imgs[id] = image + self.imgNameToId[file_name] = id + return id + + def addAnnotation( + self, + image_id: int, + category_id: int, + segmentation: list, + bbox: list = None, + area: float = None, + id: int = None, + ): + if id is not None and self.anns.get(id, None) is not None: + print("标签已经存在") + return + if not id: + self.maxAnnId += 1 + id = self.maxAnnId + if not bbox: + x, y, width, height = 0, 0, 0, 0 + else: + x, y, width, height = bbox[:] + # TODO: cal area + if not area: + area = 0 + + ann = { + "id": id, + "image_id": image_id, + "category_id": category_id, + "segmentation": [segmentation], + "area": area, + "bbox": [x, y, width, height], + } + + self.dataset["annotations"].append(ann) + self.anns[id] = ann + self.imgToAnns[image_id].append(ann) + self.catToImgs[category_id].append(image_id) + return id + + def delAnnotation(self, annId, imgId): + if "annotations" in self.dataset: + for idx, ann in enumerate(self.dataset["annotations"]): + if ann["id"] == annId: + del self.dataset["annotations"][idx] + if annId in self.anns.keys(): + del self.anns[annId] + + for idx, ann in enumerate(self.imgToAnns[imgId]): + if ann["id"] == annId: + del self.imgToAnns[imgId][idx] + + def updateAnnotation(self, id, imgId, points, bbox=None): + self.anns[id]["segmentation"] = [points] + + for rec in self.dataset["annotations"]: + if rec["id"] == id: + rec["segmentation"] = [points] + if bbox is not None: + rec["bbox"] = bbox + break + + for rec in self.dataset["annotations"]: + if rec["id"] == id: + # @todo TODO move into debug codes or controls + print( + "record point : ", + rec["segmentation"][0][0], + rec["segmentation"][0][1], + ) + break + + for rec in self.imgToAnns[imgId]: + if rec["id"] == id: + rec["segmentation"] = [points] + break + + def info(self): + """ + Print information about the annotation file. + :return: + """ + for key, value in self.dataset["info"].items(): + print("{}: {}".format(key, value)) + + def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): + """ + Get ann ids that satisfy given filter conditions. default skips that filter + :param imgIds (int array) : get anns for given imgs + catIds (int array) : get anns for given cats + areaRng (float array) : get anns for given area range (e.g. [0 inf]) + iscrowd (boolean) : get anns for given crowd label (False or True) + :return: ids (int array) : integer array of ann ids + """ + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == len(areaRng) == 0: + anns = self.dataset["annotations"] + else: + if not len(imgIds) == 0: + lists = [ + self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns + ] + anns = list(itertools.chain.from_iterable(lists)) + else: + anns = self.dataset["annotations"] + anns = ( + anns + if len(catIds) == 0 + else [ann for ann in anns if ann["category_id"] in catIds] + ) + anns = ( + anns + if len(areaRng) == 0 + else [ + ann + for ann in anns + if ann["area"] > areaRng[0] and ann["area"] < areaRng[1] + ] + ) + if not iscrowd == None: + ids = [ann["id"] for ann in anns if ann["iscrowd"] == iscrowd] + else: + ids = [ann["id"] for ann in anns] + return ids + + def getCatIds(self, catNms=[], supNms=[], catIds=[]): + """ + filtering parameters. default skips that filter. + :param catNms (str array) : get cats for given cat names + :param supNms (str array) : get cats for given supercategory names + :param catIds (int array) : get cats for given cat ids + :return: ids (int array) : integer array of cat ids + """ + catNms = catNms if _isArrayLike(catNms) else [catNms] + supNms = supNms if _isArrayLike(supNms) else [supNms] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(catNms) == len(supNms) == len(catIds) == 0: + cats = self.dataset["categories"] + else: + cats = self.dataset["categories"] + cats = ( + cats + if len(catNms) == 0 + else [cat for cat in cats if cat["name"] in catNms] + ) + cats = ( + cats + if len(supNms) == 0 + else [cat for cat in cats if cat["supercategory"] in supNms] + ) + cats = ( + cats + if len(catIds) == 0 + else [cat for cat in cats if cat["id"] in catIds] + ) + ids = [cat["id"] for cat in cats] + return ids + + def getImgIds(self, imgIds=[], catIds=[]): + """ + Get img ids that satisfy given filter conditions. + :param imgIds (int array) : get imgs for given ids + :param catIds (int array) : get imgs with all given cats + :return: ids (int array) : integer array of img ids + """ + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == 0: + ids = self.imgs.keys() + else: + ids = set(imgIds) + for i, catId in enumerate(catIds): + if i == 0 and len(ids) == 0: + ids = set(self.catToImgs[catId]) + else: + ids &= set(self.catToImgs[catId]) + return list(ids) + + def loadAnns(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying anns + :return: anns (object array) : loaded ann objects + """ + if _isArrayLike(ids): + return [self.anns[id] for id in ids] + elif type(ids) == int: + return [self.anns[ids]] + + def loadCats(self, ids=[]): + """ + Load cats with the specified ids. + :param ids (int array) : integer ids specifying cats + :return: cats (object array) : loaded cat objects + """ + if _isArrayLike(ids): + return [self.cats[id] for id in ids] + elif type(ids) == int: + return [self.cats[ids]] + + def loadImgs(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying img + :return: imgs (object array) : loaded img objects + """ + if _isArrayLike(ids): + return [self.imgs[id] for id in ids] + elif type(ids) == int: + return [self.imgs[ids]] + + # def showAnns(self, anns, draw_bbox=False): + # """ + # Display the specified annotations. + # :param anns (array of object): annotations to display + # :return: None + # """ + # if len(anns) == 0: + # return 0 + # if "segmentation" in anns[0] or "keypoints" in anns[0]: + # datasetType = "instances" + # elif "caption" in anns[0]: + # datasetType = "captions" + # else: + # raise Exception("datasetType not supported") + # if datasetType == "instances": + # ax = plt.gca() + # ax.set_autoscale_on(False) + # polygons = [] + # color = [] + # for ann in anns: + # c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0] + # if "segmentation" in ann: + # if type(ann["segmentation"]) == list: + # # polygon + # for seg in ann["segmentation"]: + # poly = np.array(seg).reshape((int(len(seg) / 2), 2)) + # polygons.append(Polygon(poly)) + # color.append(c) + # else: + # # mask + # t = self.imgs[ann["image_id"]] + # if type(ann["segmentation"]["counts"]) == list: + # rle = maskUtils.frPyObjects( + # [ann["segmentation"]], t["height"], t["width"] + # ) + # else: + # rle = [ann["segmentation"]] + # m = maskUtils.decode(rle) + # img = np.ones((m.shape[0], m.shape[1], 3)) + # if ann["iscrowd"] == 1: + # color_mask = np.array([2.0, 166.0, 101.0]) / 255 + # if ann["iscrowd"] == 0: + # color_mask = np.random.random((1, 3)).tolist()[0] + # for i in range(3): + # img[:, :, i] = color_mask[i] + # ax.imshow(np.dstack((img, m * 0.5))) + # if "keypoints" in ann and type(ann["keypoints"]) == list: + # # turn skeleton into zero-based index + # sks = np.array(self.loadCats(ann["category_id"])[0]["skeleton"]) - 1 + # kp = np.array(ann["keypoints"]) + # x = kp[0::3] + # y = kp[1::3] + # v = kp[2::3] + # for sk in sks: + # if np.all(v[sk] > 0): + # plt.plot(x[sk], y[sk], linewidth=3, color=c) + # plt.plot( + # x[v > 0], + # y[v > 0], + # "o", + # markersize=8, + # markerfacecolor=c, + # markeredgecolor="k", + # markeredgewidth=2, + # ) + # plt.plot( + # x[v > 1], + # y[v > 1], + # "o", + # markersize=8, + # markerfacecolor=c, + # markeredgecolor=c, + # markeredgewidth=2, + # ) + # + # if draw_bbox: + # [bbox_x, bbox_y, bbox_w, bbox_h] = ann["bbox"] + # poly = [ + # [bbox_x, bbox_y], + # [bbox_x, bbox_y + bbox_h], + # [bbox_x + bbox_w, bbox_y + bbox_h], + # [bbox_x + bbox_w, bbox_y], + # ] + # np_poly = np.array(poly).reshape((4, 2)) + # polygons.append(Polygon(np_poly)) + # color.append(c) + # + # p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) + # ax.add_collection(p) + # p = PatchCollection( + # polygons, facecolor="none", edgecolors=color, linewidths=2 + # ) + # ax.add_collection(p) + # elif datasetType == "captions": + # for ann in anns: + # print(ann["caption"]) + # + # def loadRes(self, resFile): + # """ + # Load result file and return a result api object. + # :param resFile (str) : file name of result file + # :return: res (obj) : result api object + # """ + # res = COCO() + # res.dataset["images"] = [img for img in self.dataset["images"]] + # + # print("Loading and preparing results...") + # tic = time.time() + # if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode): + # anns = json.load(open(resFile)) + # elif type(resFile) == np.ndarray: + # anns = self.loadNumpyAnnotations(resFile) + # else: + # anns = resFile + # assert type(anns) == list, "results in not an array of objects" + # annsImgIds = [ann["image_id"] for ann in anns] + # assert set(annsImgIds) == ( + # set(annsImgIds) & set(self.getImgIds()) + # ), "Results do not correspond to current coco set" + # if "caption" in anns[0]: + # imgIds = set([img["id"] for img in res.dataset["images"]]) & set( + # [ann["image_id"] for ann in anns] + # ) + # res.dataset["images"] = [ + # img for img in res.dataset["images"] if img["id"] in imgIds + # ] + # for id, ann in enumerate(anns): + # ann["id"] = id + 1 + # elif "bbox" in anns[0] and not anns[0]["bbox"] == []: + # res.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + # for id, ann in enumerate(anns): + # bb = ann["bbox"] + # x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + # if not "segmentation" in ann: + # ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + # ann["area"] = bb[2] * bb[3] + # ann["id"] = id + 1 + # ann["iscrowd"] = 0 + # elif "segmentation" in anns[0]: + # res.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + # for id, ann in enumerate(anns): + # # now only support compressed RLE format as segmentation results + # ann["area"] = maskUtils.area(ann["segmentation"]) + # if not "bbox" in ann: + # ann["bbox"] = maskUtils.toBbox(ann["segmentation"]) + # ann["id"] = id + 1 + # ann["iscrowd"] = 0 + # elif "keypoints" in anns[0]: + # res.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + # for id, ann in enumerate(anns): + # s = ann["keypoints"] + # x = s[0::3] + # y = s[1::3] + # x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y) + # ann["area"] = (x1 - x0) * (y1 - y0) + # ann["id"] = id + 1 + # ann["bbox"] = [x0, y0, x1 - x0, y1 - y0] + # print("DONE (t={:0.2f}s)".format(time.time() - tic)) + # + # res.dataset["annotations"] = anns + # res.createIndex() + # return res + + def download(self, tarDir=None, imgIds=[]): + """ + Download COCO images from mscoco.org server. + :param tarDir (str): COCO results directory name + imgIds (list): images to be downloaded + :return: + """ + if tarDir is None: + print("Please specify target directory") + return -1 + if len(imgIds) == 0: + imgs = self.imgs.values() + else: + imgs = self.loadImgs(imgIds) + N = len(imgs) + if not os.path.exists(tarDir): + os.makedirs(tarDir) + for i, img in enumerate(imgs): + tic = time.time() + fname = os.path.join(tarDir, img["file_name"]) + if not os.path.exists(fname): + urlretrieve(img["coco_url"], fname) + print( + "downloaded {}/{} images (t={:0.1f}s)".format(i, N, time.time() - tic) + ) + + def loadNumpyAnnotations(self, data): + """ + Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} + :param data (numpy.ndarray) + :return: annotations (python nested list) + """ + print("Converting ndarray to lists...") + assert type(data) == np.ndarray + print(data.shape) + assert data.shape[1] == 7 + N = data.shape[0] + ann = [] + for i in range(N): + if i % 1000000 == 0: + print("{}/{}".format(i, N)) + ann += [ + { + "image_id": int(data[i, 0]), + "bbox": [data[i, 1], data[i, 2], data[i, 3], data[i, 4]], + "score": data[i, 5], + "category_id": int(data[i, 6]), + } + ] + return ann + + # def annToRLE(self, ann): + # """ + # Convert annotation which can be polygons, uncompressed RLE to RLE. + # :return: binary mask (numpy 2D array) + # """ + # t = self.imgs[ann["image_id"]] + # h, w = t["height"], t["width"] + # segm = ann["segmentation"] + # if type(segm) == list: + # # polygon -- a single object might consist of multiple parts + # # we merge all parts into one mask rle code + # rles = maskUtils.frPyObjects(segm, h, w) + # rle = maskUtils.merge(rles) + # elif type(segm["counts"]) == list: + # # uncompressed RLE + # rle = maskUtils.frPyObjects(segm, h, w) + # else: + # # rle + # rle = ann["segmentation"] + # return rle + + # def annToMask(self, ann): + # """ + # Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. + # :return: binary mask (numpy 2D array) + # """ + # rle = self.annToRLE(ann) + # m = maskUtils.decode(rle) + # return m diff --git a/EISeg/eiseg/util/coco/cocoeval.py b/EISeg/eiseg/util/coco/cocoeval.py new file mode 100644 index 0000000000..f4e3decbab --- /dev/null +++ b/EISeg/eiseg/util/coco/cocoeval.py @@ -0,0 +1,534 @@ +__author__ = 'tsungyi' + +import numpy as np +import datetime +import time +from collections import defaultdict +from . import mask as maskUtils +import copy + +class COCOeval: + # Interface for evaluating detection on the Microsoft COCO dataset. + # + # The usage for CocoEval is as follows: + # cocoGt=..., cocoDt=... # load dataset and results + # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object + # E.params.recThrs = ...; # set parameters as desired + # E.evaluate(); # run per image evaluation + # E.accumulate(); # accumulate per image results + # E.summarize(); # display summary metrics of results + # For example usage see evalDemo.m and http://mscoco.org/. + # + # The evaluation parameters are as follows (defaults in brackets): + # imgIds - [all] N img ids to use for evaluation + # catIds - [all] K cat ids to use for evaluation + # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation + # recThrs - [0:.01:1] R=101 recall thresholds for evaluation + # areaRng - [...] A=4 object area ranges for evaluation + # maxDets - [1 10 100] M=3 thresholds on max detections per image + # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation + # Note: if useCats=0 category labels are ignored as in proposal scoring. + # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. + # + # evaluate(): evaluates detections on every image and every category and + # concats the results into the "evalImgs" with fields: + # dtIds - [1xD] id for each of the D detections (dt) + # gtIds - [1xG] id for each of the G ground truths (gt) + # dtMatches - [TxD] matching gt id at each IoU or 0 + # gtMatches - [TxG] matching dt id at each IoU or 0 + # dtScores - [1xD] confidence of each dt + # gtIgnore - [1xG] ignore flag for each gt + # dtIgnore - [TxD] ignore flag for each dt at each IoU + # + # accumulate(): accumulates the per-image, per-category evaluation + # results in "evalImgs" into the dictionary "eval" with fields: + # params - parameters used for evaluation + # date - date evaluation was performed + # counts - [T,R,K,A,M] parameter dimensions (see above) + # precision - [TxRxKxAxM] precision for every evaluation setting + # recall - [TxKxAxM] max recall for every evaluation setting + # Note: precision and recall==-1 for settings with no gt objects. + # + # See also coco, mask, pycocoDemo, pycocoEvalDemo + # + # Microsoft COCO Toolbox. version 2.0 + # Data, paper, and tutorials available at: http://mscoco.org/ + # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. + # Licensed under the Simplified BSD License [see coco/license.txt] + def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): + ''' + Initialize CocoEval using coco APIs for gt and dt + :param cocoGt: coco object with ground truth annotations + :param cocoDt: coco object with detection results + :return: None + ''' + if not iouType: + print('iouType not specified. use default iouType segm') + self.cocoGt = cocoGt # ground truth COCO API + self.cocoDt = cocoDt # detections COCO API + self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements + self.eval = {} # accumulated evaluation results + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + self.params = Params(iouType=iouType) # parameters + self._paramsEval = {} # parameters for evaluation + self.stats = [] # result summarization + self.ious = {} # ious between all gts and dts + if not cocoGt is None: + self.params.imgIds = sorted(cocoGt.getImgIds()) + self.params.catIds = sorted(cocoGt.getCatIds()) + + + def _prepare(self): + ''' + Prepare ._gts and ._dts for evaluation based on params + :return: None + ''' + def _toMask(anns, coco): + # modify ann['segmentation'] by reference + for ann in anns: + rle = coco.annToRLE(ann) + ann['segmentation'] = rle + p = self.params + if p.useCats: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + else: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) + + # convert ground truth to mask if iouType == 'segm' + if p.iouType == 'segm': + _toMask(gts, self.cocoGt) + _toMask(dts, self.cocoDt) + # set ignore flag + for gt in gts: + gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 + gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] + if p.iouType == 'keypoints': + gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + for gt in gts: + self._gts[gt['image_id'], gt['category_id']].append(gt) + for dt in dts: + self._dts[dt['image_id'], dt['category_id']].append(dt) + self.evalImgs = defaultdict(list) # per-image per-category evaluation results + self.eval = {} # accumulated evaluation results + + def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + tic = time.time() + print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if not p.useSegm is None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params=p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = {(imgId, catId): computeIoU(imgId, catId) \ + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print('DONE (t={:0.2f}s).'.format(toc-tic)) + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return [] + inds = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt=dt[0:p.maxDets[-1]] + + if p.iouType == 'segm': + g = [g['segmentation'] for g in gt] + d = [d['segmentation'] for d in dt] + elif p.iouType == 'bbox': + g = [g['bbox'] for g in gt] + d = [d['bbox'] for d in dt] + else: + raise Exception('unknown iouType for iou computation') + + # compute iou between each dt and gt region + iscrowd = [int(o['iscrowd']) for o in gt] + ious = maskUtils.iou(d,g,iscrowd) + return ious + + def computeOks(self, imgId, catId): + p = self.params + # dimention here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort([-d['score'] for d in dts], kind='mergesort') + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0:p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = p.kpt_oks_sigmas + vars = (sigmas * 2)**2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt['keypoints']) + xg = g[0::3]; yg = g[1::3]; vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt['bbox'] + x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt['keypoints']) + xd = d[0::3]; yd = d[1::3] + if k1>0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros((k)) + dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) + dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) + e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 + if k1 > 0: + e=e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] + return ious + + def evaluateImg(self, imgId, catId, aRng, maxDet): + ''' + perform evaluation for single category and image + :return: dict (single image results) + ''' + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return None + + for g in gt: + if g['ignore'] or (g['area']aRng[1]): + g['_ignore'] = 1 + else: + g['_ignore'] = 0 + + # sort dt highest score first, sort gt ignore last + gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') + gt = [gt[i] for i in gtind] + dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in dtind[0:maxDet]] + iscrowd = [int(o['iscrowd']) for o in gt] + # load computed ious + ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] + + T = len(p.iouThrs) + G = len(gt) + D = len(dt) + gtm = np.zeros((T,G)) + dtm = np.zeros((T,D)) + gtIg = np.array([g['_ignore'] for g in gt]) + dtIg = np.zeros((T,D)) + if not len(ious)==0: + for tind, t in enumerate(p.iouThrs): + for dind, d in enumerate(dt): + # information about best match so far (m=-1 -> unmatched) + iou = min([t,1-1e-10]) + m = -1 + for gind, g in enumerate(gt): + # if this gt already matched, and not a crowd, continue + if gtm[tind,gind]>0 and not iscrowd[gind]: + continue + # if dt matched to reg gt, and on ignore gt, stop + if m>-1 and gtIg[m]==0 and gtIg[gind]==1: + break + # continue to next gt unless better match made + if ious[dind,gind] < iou: + continue + # if match successful and best so far, store appropriately + iou=ious[dind,gind] + m=gind + # if match made store id of match for both dt and gt + if m ==-1: + continue + dtIg[tind,dind] = gtIg[m] + dtm[tind,dind] = gt[m]['id'] + gtm[tind,m] = d['id'] + # set unmatched detections outside of area range to ignore + a = np.array([d['area']aRng[1] for d in dt]).reshape((1, len(dt))) + dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0))) + # store results for given image and category + return { + 'image_id': imgId, + 'category_id': catId, + 'aRng': aRng, + 'maxDet': maxDet, + 'dtIds': [d['id'] for d in dt], + 'gtIds': [g['id'] for g in gt], + 'dtMatches': dtm, + 'gtMatches': gtm, + 'dtScores': [d['score'] for d in dt], + 'gtIgnore': gtIg, + 'dtIgnore': dtIg, + } + + def accumulate(self, p = None): + ''' + Accumulate per image evaluation results and store the result in self.eval + :param p: input params for evaluation + :return: None + ''' + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: + continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: + continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + q = np.zeros((R,)) + ss = np.zeros((R,)) + + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + ss[ri] = dtScoresSorted[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + scores[t,:,k,a,m] = np.array(ss) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + def summarize(self): + ''' + Compute and display summary metrics for evaluation results. + Note this functin can *only* be applied on the default parameter setting + ''' + def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): + p = self.params + iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap==1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval['precision'] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,:,aind,mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,aind,mind] + if len(s[s>-1])==0: + mean_s = -1 + else: + mean_s = np.mean(s[s>-1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) + return stats + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=.5) + stats[2] = _summarize(1, maxDets=20, iouThr=.75) + stats[3] = _summarize(1, maxDets=20, areaRng='medium') + stats[4] = _summarize(1, maxDets=20, areaRng='large') + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=.5) + stats[7] = _summarize(0, maxDets=20, iouThr=.75) + stats[8] = _summarize(0, maxDets=20, areaRng='medium') + stats[9] = _summarize(0, maxDets=20, areaRng='large') + return stats + if not self.eval: + raise Exception('Please run accumulate() first') + iouType = self.params.iouType + if iouType == 'segm' or iouType == 'bbox': + summarize = _summarizeDets + elif iouType == 'keypoints': + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() + +class Params: + ''' + Params for coco evaluation api + ''' + def setDetParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'small', 'medium', 'large'] + self.useCats = 1 + + def setKpParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'medium', 'large'] + self.useCats = 1 + self.kpt_oks_sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 + + def __init__(self, iouType='segm'): + if iouType == 'segm' or iouType == 'bbox': + self.setDetParams() + elif iouType == 'keypoints': + self.setKpParams() + else: + raise Exception('iouType not supported') + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None diff --git a/EISeg/eiseg/util/coco/common/gason.cpp b/EISeg/eiseg/util/coco/common/gason.cpp new file mode 100644 index 0000000000..0f2c00e266 --- /dev/null +++ b/EISeg/eiseg/util/coco/common/gason.cpp @@ -0,0 +1,335 @@ +// https://github.com/vivkin/gason - pulled January 10, 2016 +#include "gason.h" +#include + +#define JSON_ZONE_SIZE 4096 +#define JSON_STACK_SIZE 32 + +const char *jsonStrError(int err) { + switch (err) { +#define XX(no, str) \ + case JSON_##no: \ + return str; + JSON_ERRNO_MAP(XX) +#undef XX + default: + return "unknown"; + } +} + +void *JsonAllocator::allocate(size_t size) { + size = (size + 7) & ~7; + + if (head && head->used + size <= JSON_ZONE_SIZE) { + char *p = (char *)head + head->used; + head->used += size; + return p; + } + + size_t allocSize = sizeof(Zone) + size; + Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); + if (zone == nullptr) + return nullptr; + zone->used = allocSize; + if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { + zone->next = head; + head = zone; + } else { + zone->next = head->next; + head->next = zone; + } + return (char *)zone + sizeof(Zone); +} + +void JsonAllocator::deallocate() { + while (head) { + Zone *next = head->next; + free(head); + head = next; + } +} + +static inline bool isspace(char c) { + return c == ' ' || (c >= '\t' && c <= '\r'); +} + +static inline bool isdelim(char c) { + return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; +} + +static inline bool isdigit(char c) { + return c >= '0' && c <= '9'; +} + +static inline bool isxdigit(char c) { + return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); +} + +static inline int char2int(char c) { + if (c <= '9') + return c - '0'; + return (c & ~' ') - 'A' + 10; +} + +static double string2double(char *s, char **endptr) { + char ch = *s; + if (ch == '-') + ++s; + + double result = 0; + while (isdigit(*s)) + result = (result * 10) + (*s++ - '0'); + + if (*s == '.') { + ++s; + + double fraction = 1; + while (isdigit(*s)) { + fraction *= 0.1; + result += (*s++ - '0') * fraction; + } + } + + if (*s == 'e' || *s == 'E') { + ++s; + + double base = 10; + if (*s == '+') + ++s; + else if (*s == '-') { + ++s; + base = 0.1; + } + + unsigned int exponent = 0; + while (isdigit(*s)) + exponent = (exponent * 10) + (*s++ - '0'); + + double power = 1; + for (; exponent; exponent >>= 1, base *= base) + if (exponent & 1) + power *= base; + + result *= power; + } + + *endptr = s; + return ch == '-' ? -result : result; +} + +static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { + if (!tail) + return node->next = node; + node->next = tail->next; + tail->next = node; + return node; +} + +static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { + if (tail) { + auto head = tail->next; + tail->next = nullptr; + return JsonValue(tag, head); + } + return JsonValue(tag, nullptr); +} + +int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { + JsonNode *tails[JSON_STACK_SIZE]; + JsonTag tags[JSON_STACK_SIZE]; + char *keys[JSON_STACK_SIZE]; + JsonValue o; + int pos = -1; + bool separator = true; + JsonNode *node; + *endptr = s; + + while (*s) { + while (isspace(*s)) { + ++s; + if (!*s) break; + } + *endptr = s++; + switch (**endptr) { + case '-': + if (!isdigit(*s) && *s != '.') { + *endptr = s; + return JSON_BAD_NUMBER; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + o = JsonValue(string2double(*endptr, &s)); + if (!isdelim(*s)) { + *endptr = s; + return JSON_BAD_NUMBER; + } + break; + case '"': + o = JsonValue(JSON_STRING, s); + for (char *it = s; *s; ++it, ++s) { + int c = *it = *s; + if (c == '\\') { + c = *++s; + switch (c) { + case '\\': + case '"': + case '/': + *it = c; + break; + case 'b': + *it = '\b'; + break; + case 'f': + *it = '\f'; + break; + case 'n': + *it = '\n'; + break; + case 'r': + *it = '\r'; + break; + case 't': + *it = '\t'; + break; + case 'u': + c = 0; + for (int i = 0; i < 4; ++i) { + if (isxdigit(*++s)) { + c = c * 16 + char2int(*s); + } else { + *endptr = s; + return JSON_BAD_STRING; + } + } + if (c < 0x80) { + *it = c; + } else if (c < 0x800) { + *it++ = 0xC0 | (c >> 6); + *it = 0x80 | (c & 0x3F); + } else { + *it++ = 0xE0 | (c >> 12); + *it++ = 0x80 | ((c >> 6) & 0x3F); + *it = 0x80 | (c & 0x3F); + } + break; + default: + *endptr = s; + return JSON_BAD_STRING; + } + } else if ((unsigned int)c < ' ' || c == '\x7F') { + *endptr = s; + return JSON_BAD_STRING; + } else if (c == '"') { + *it = 0; + ++s; + break; + } + } + if (!isdelim(*s)) { + *endptr = s; + return JSON_BAD_STRING; + } + break; + case 't': + if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_TRUE); + s += 3; + break; + case 'f': + if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_FALSE); + s += 4; + break; + case 'n': + if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_NULL); + s += 3; + break; + case ']': + if (pos == -1) + return JSON_STACK_UNDERFLOW; + if (tags[pos] != JSON_ARRAY) + return JSON_MISMATCH_BRACKET; + o = listToValue(JSON_ARRAY, tails[pos--]); + break; + case '}': + if (pos == -1) + return JSON_STACK_UNDERFLOW; + if (tags[pos] != JSON_OBJECT) + return JSON_MISMATCH_BRACKET; + if (keys[pos] != nullptr) + return JSON_UNEXPECTED_CHARACTER; + o = listToValue(JSON_OBJECT, tails[pos--]); + break; + case '[': + if (++pos == JSON_STACK_SIZE) + return JSON_STACK_OVERFLOW; + tails[pos] = nullptr; + tags[pos] = JSON_ARRAY; + keys[pos] = nullptr; + separator = true; + continue; + case '{': + if (++pos == JSON_STACK_SIZE) + return JSON_STACK_OVERFLOW; + tails[pos] = nullptr; + tags[pos] = JSON_OBJECT; + keys[pos] = nullptr; + separator = true; + continue; + case ':': + if (separator || keys[pos] == nullptr) + return JSON_UNEXPECTED_CHARACTER; + separator = true; + continue; + case ',': + if (separator || keys[pos] != nullptr) + return JSON_UNEXPECTED_CHARACTER; + separator = true; + continue; + case '\0': + continue; + default: + return JSON_UNEXPECTED_CHARACTER; + } + + separator = false; + + if (pos == -1) { + *endptr = s; + *value = o; + return JSON_OK; + } + + if (tags[pos] == JSON_OBJECT) { + if (!keys[pos]) { + if (o.getTag() != JSON_STRING) + return JSON_UNQUOTED_KEY; + keys[pos] = o.toString(); + continue; + } + if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) + return JSON_ALLOCATION_FAILURE; + tails[pos] = insertAfter(tails[pos], node); + tails[pos]->key = keys[pos]; + keys[pos] = nullptr; + } else { + if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) + return JSON_ALLOCATION_FAILURE; + tails[pos] = insertAfter(tails[pos], node); + } + tails[pos]->value = o; + } + return JSON_BREAKING_BAD; +} diff --git a/EISeg/eiseg/util/coco/common/gason.h b/EISeg/eiseg/util/coco/common/gason.h new file mode 100644 index 0000000000..2e728d9f70 --- /dev/null +++ b/EISeg/eiseg/util/coco/common/gason.h @@ -0,0 +1,136 @@ +// https://github.com/vivkin/gason - pulled January 10, 2016 +#pragma once + +#include +#include +#include + +enum JsonTag { + JSON_NUMBER = 0, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT, + JSON_TRUE, + JSON_FALSE, + JSON_NULL = 0xF +}; + +struct JsonNode; + +#define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL +#define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL +#define JSON_VALUE_TAG_MASK 0xF +#define JSON_VALUE_TAG_SHIFT 47 + +union JsonValue { + uint64_t ival; + double fval; + + JsonValue(double x) + : fval(x) { + } + JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { + assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); + ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; + } + bool isDouble() const { + return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; + } + JsonTag getTag() const { + return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); + } + uint64_t getPayload() const { + assert(!isDouble()); + return ival & JSON_VALUE_PAYLOAD_MASK; + } + double toNumber() const { + assert(getTag() == JSON_NUMBER); + return fval; + } + char *toString() const { + assert(getTag() == JSON_STRING); + return (char *)getPayload(); + } + JsonNode *toNode() const { + assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); + return (JsonNode *)getPayload(); + } +}; + +struct JsonNode { + JsonValue value; + JsonNode *next; + char *key; +}; + +struct JsonIterator { + JsonNode *p; + + void operator++() { + p = p->next; + } + bool operator!=(const JsonIterator &x) const { + return p != x.p; + } + JsonNode *operator*() const { + return p; + } + JsonNode *operator->() const { + return p; + } +}; + +inline JsonIterator begin(JsonValue o) { + return JsonIterator{o.toNode()}; +} +inline JsonIterator end(JsonValue) { + return JsonIterator{nullptr}; +} + +#define JSON_ERRNO_MAP(XX) \ + XX(OK, "ok") \ + XX(BAD_NUMBER, "bad number") \ + XX(BAD_STRING, "bad string") \ + XX(BAD_IDENTIFIER, "bad identifier") \ + XX(STACK_OVERFLOW, "stack overflow") \ + XX(STACK_UNDERFLOW, "stack underflow") \ + XX(MISMATCH_BRACKET, "mismatch bracket") \ + XX(UNEXPECTED_CHARACTER, "unexpected character") \ + XX(UNQUOTED_KEY, "unquoted key") \ + XX(BREAKING_BAD, "breaking bad") \ + XX(ALLOCATION_FAILURE, "allocation failure") + +enum JsonErrno { +#define XX(no, str) JSON_##no, + JSON_ERRNO_MAP(XX) +#undef XX +}; + +const char *jsonStrError(int err); + +class JsonAllocator { + struct Zone { + Zone *next; + size_t used; + } *head = nullptr; + +public: + JsonAllocator() = default; + JsonAllocator(const JsonAllocator &) = delete; + JsonAllocator &operator=(const JsonAllocator &) = delete; + JsonAllocator(JsonAllocator &&x) : head(x.head) { + x.head = nullptr; + } + JsonAllocator &operator=(JsonAllocator &&x) { + head = x.head; + x.head = nullptr; + return *this; + } + ~JsonAllocator() { + deallocate(); + } + void *allocate(size_t size); + void deallocate(); +}; + +int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); diff --git a/EISeg/eiseg/util/coco/common/maskApi.c b/EISeg/eiseg/util/coco/common/maskApi.c new file mode 100644 index 0000000000..917dc28a3f --- /dev/null +++ b/EISeg/eiseg/util/coco/common/maskApi.c @@ -0,0 +1,231 @@ +/************************************************************************** +* Microsoft COCO Toolbox. version 2.0 +* Data, paper, and tutorials available at: http://mscoco.org/ +* Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +* Licensed under the Simplified BSD License [see coco/license.txt] +**************************************************************************/ +#include "maskApi.h" +#include +#include + +uint umin( uint a, uint b ) { return (ab) ? a : b; } + +void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { + R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); + siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; +} + +void rleFree( RLE *R ) { + free(R->cnts); R->cnts=0; +} + +void rlesInit( RLE **R, siz n ) { + siz i; *R = (RLE*) malloc(sizeof(RLE)*n); + for(i=0; i0 ) { + c=umin(ca,cb); cc+=c; ct=0; + ca-=c; if(!ca && a0) { + crowd=iscrowd!=NULL && iscrowd[g]; + if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } + siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; + ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; + cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; + while( ct>0 ) { + c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; + ca-=c; if(!ca && athr) keep[j]=0; + } + } +} + +void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { + double h, w, i, u, ga, da; siz g, d; int crowd; + for( g=0; gthr) keep[j]=0; + } + } +} + +void rleToBbox( const RLE *R, BB bb, siz n ) { + siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); + if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } + s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; + if(dx>=dy) for( d=0; d<=dx; d++ ) { + t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; + } else for( d=0; d<=dy; d++ ) { + t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; + } + } + /* get points along y-boundary and downsample */ + free(x); free(y); k=m; m=0; double xd, yd; + x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); + for( j=1; jw-1 ) continue; + yd=(double)(v[j]h) yd=h; yd=ceil(yd); + x[m]=(int) xd; y[m]=(int) yd; m++; + } + /* compute rle encoding given y-boundary points */ + k=m; a=malloc(sizeof(uint)*(k+1)); + for( j=0; j0) b[m++]=a[j++]; else { + j++; if(jm, p=0; long x; int more; + char *s=malloc(sizeof(char)*m*6); + for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; + while( more ) { + char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; + if(more) c |= 0x20; c+=48; s[p++]=c; + } + } + s[p]=0; return s; +} + +void rleFrString( RLE *R, char *s, siz h, siz w ) { + siz m=0, p=0, k; long x; int more; uint *cnts; + while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; + while( s[p] ) { + x=0; k=0; more=1; + while( more ) { + char c=s[p]-48; x |= (c & 0x1f) << 5*k; + more = c & 0x20; p++; k++; + if(!more && (c & 0x10)) x |= -1 << 5*k; + } + if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; + } + rleInit(R,h,w,m,cnts); free(cnts); +} diff --git a/EISeg/eiseg/util/coco/common/maskApi.h b/EISeg/eiseg/util/coco/common/maskApi.h new file mode 100644 index 0000000000..ebc7892da3 --- /dev/null +++ b/EISeg/eiseg/util/coco/common/maskApi.h @@ -0,0 +1,60 @@ +/************************************************************************** +* Microsoft COCO Toolbox. version 2.0 +* Data, paper, and tutorials available at: http://mscoco.org/ +* Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +* Licensed under the Simplified BSD License [see coco/license.txt] +**************************************************************************/ +#pragma once + +typedef unsigned int uint; +typedef unsigned long siz; +typedef unsigned char byte; +typedef double* BB; +typedef struct { siz h, w, m; uint *cnts; } RLE; + +/* Initialize/destroy RLE. */ +void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); +void rleFree( RLE *R ); + +/* Initialize/destroy RLE array. */ +void rlesInit( RLE **R, siz n ); +void rlesFree( RLE **R, siz n ); + +/* Encode binary masks using RLE. */ +void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); + +/* Decode binary masks encoded via RLE. */ +void rleDecode( const RLE *R, byte *mask, siz n ); + +/* Compute union or intersection of encoded masks. */ +void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); + +/* Compute area of encoded masks. */ +void rleArea( const RLE *R, siz n, uint *a ); + +/* Compute intersection over union between masks. */ +void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); + +/* Compute non-maximum suppression between bounding masks */ +void rleNms( RLE *dt, siz n, uint *keep, double thr ); + +/* Compute intersection over union between bounding boxes. */ +void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); + +/* Compute non-maximum suppression between bounding boxes */ +void bbNms( BB dt, siz n, uint *keep, double thr ); + +/* Get bounding boxes surrounding encoded masks. */ +void rleToBbox( const RLE *R, BB bb, siz n ); + +/* Convert bounding boxes to encoded masks. */ +void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); + +/* Convert polygon to encoded mask. */ +void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); + +/* Get compressed string representation of encoded mask. */ +char* rleToString( const RLE *R ); + +/* Convert from compressed string representation of encoded mask. */ +void rleFrString( RLE *R, char *s, siz h, siz w ); diff --git a/EISeg/eiseg/util/coco/mask.py b/EISeg/eiseg/util/coco/mask.py new file mode 100644 index 0000000000..40853ba0d3 --- /dev/null +++ b/EISeg/eiseg/util/coco/mask.py @@ -0,0 +1,103 @@ +__author__ = 'tsungyi' + +import pycocotools._mask as _mask + +# Interface for manipulating masks stored in RLE format. +# +# RLE is a simple yet efficient format for storing binary masks. RLE +# first divides a vector (or vectorized image) into a series of piecewise +# constant regions and then for each piece simply stores the length of +# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would +# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] +# (note that the odd counts are always the numbers of zeros). Instead of +# storing the counts directly, additional compression is achieved with a +# variable bitrate representation based on a common scheme called LEB128. +# +# Compression is greatest given large piecewise constant regions. +# Specifically, the size of the RLE is proportional to the number of +# *boundaries* in M (or for an image the number of boundaries in the y +# direction). Assuming fairly simple shapes, the RLE representation is +# O(sqrt(n)) where n is number of pixels in the object. Hence space usage +# is substantially lower, especially for large simple objects (large n). +# +# Many common operations on masks can be computed directly using the RLE +# (without need for decoding). This includes computations such as area, +# union, intersection, etc. All of these operations are linear in the +# size of the RLE, in other words they are O(sqrt(n)) where n is the area +# of the object. Computing these operations on the original mask is O(n). +# Thus, using the RLE can result in substantial computational savings. +# +# The following API functions are defined: +# encode - Encode binary masks using RLE. +# decode - Decode binary masks encoded via RLE. +# merge - Compute union or intersection of encoded masks. +# iou - Compute intersection over union between masks. +# area - Compute area of encoded masks. +# toBbox - Get bounding boxes surrounding encoded masks. +# frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. +# +# Usage: +# Rs = encode( masks ) +# masks = decode( Rs ) +# R = merge( Rs, intersect=false ) +# o = iou( dt, gt, iscrowd ) +# a = area( Rs ) +# bbs = toBbox( Rs ) +# Rs = frPyObjects( [pyObjects], h, w ) +# +# In the API the following formats are used: +# Rs - [dict] Run-length encoding of binary masks +# R - dict Run-length encoding of binary mask +# masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) +# iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore +# bbs - [nx4] Bounding box(es) stored as [x y w h] +# poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) +# dt,gt - May be either bounding boxes or encoded masks +# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). +# +# Finally, a note about the intersection over union (iou) computation. +# The standard iou of a ground truth (gt) and detected (dt) object is +# iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) +# For "crowd" regions, we use a modified criteria. If a gt object is +# marked as "iscrowd", we allow a dt to match any subregion of the gt. +# Choosing gt' in the crowd gt that best matches the dt can be done using +# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing +# iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) +# For crowd gt regions we use this modified criteria above for the iou. +# +# To compile run "python setup.py build_ext --inplace" +# Please do not contact us for help with compiling. +# +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +# Licensed under the Simplified BSD License [see coco/license.txt] + +iou = _mask.iou +merge = _mask.merge +frPyObjects = _mask.frPyObjects + +def encode(bimask): + if len(bimask.shape) == 3: + return _mask.encode(bimask) + elif len(bimask.shape) == 2: + h, w = bimask.shape + return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] + +def decode(rleObjs): + if type(rleObjs) == list: + return _mask.decode(rleObjs) + else: + return _mask.decode([rleObjs])[:,:,0] + +def area(rleObjs): + if type(rleObjs) == list: + return _mask.area(rleObjs) + else: + return _mask.area([rleObjs])[0] + +def toBbox(rleObjs): + if type(rleObjs) == list: + return _mask.toBbox(rleObjs) + else: + return _mask.toBbox([rleObjs])[0] \ No newline at end of file diff --git a/EISeg/eiseg/util/colormap.py b/EISeg/eiseg/util/colormap.py new file mode 100644 index 0000000000..81694b4369 --- /dev/null +++ b/EISeg/eiseg/util/colormap.py @@ -0,0 +1,26 @@ +import os.path as osp +import random + +from eiseg import pjpath + + +class ColorMap(object): + def __init__(self, color_path, shuffle=False): + self.colors = [] + self.index = 0 + self.usedColors = [] + colors = open(color_path, "r").readlines() + if shuffle: + random.shuffle(colors) + self.colors = [[int(x) for x in c.strip().split(",")] for c in colors] + + def get_color(self): + color = self.colors[self.index] + self.index = (self.index + 1) % len(self) + return color + + def __len__(self): + return len(self.colors) + + +colorMap = ColorMap(osp.join(pjpath, "config/colormap.txt")) diff --git a/contrib/EISeg/eiseg/util/config.py b/EISeg/eiseg/util/config.py similarity index 58% rename from contrib/EISeg/eiseg/util/config.py rename to EISeg/eiseg/util/config.py index 8e3ca4caf7..a45a0b40f3 100644 --- a/contrib/EISeg/eiseg/util/config.py +++ b/EISeg/eiseg/util/config.py @@ -1,18 +1,31 @@ import yaml import os.path as osp +import os +from eiseg import pjpath -def parseConfigs(path): + +def parse_configs(path): if not path or not osp.exists(path): return with open(path, "r", encoding="utf-8") as f: return yaml.load(f.read(), Loader=yaml.FullLoader) -def saveConfigs(path, config): - if not osp.exists(osp.basename(path)): - print("文件夹不存在") - return +def save_configs(path=None, config=None, actions=None): + if not path: + path = osp.join(pjpath, "config/config.yaml") + if not osp.exists(path): + # os.makedirs(osp.basename(path)) + # windows无法使用mknod + f = open(path, 'w+') + f.close() + if not config: + config = {} + if actions: + config["shortcut"] = {} + for action in actions: + config["shortcut"][action.data()] = action.shortcut().toString() with open(path, "w", encoding="utf-8") as f: yaml.dump(config, f) diff --git a/contrib/EISeg/eiseg/util/exp_imports/default.py b/EISeg/eiseg/util/exp_imports/default.py similarity index 100% rename from contrib/EISeg/eiseg/util/exp_imports/default.py rename to EISeg/eiseg/util/exp_imports/default.py diff --git a/EISeg/eiseg/util/label.py b/EISeg/eiseg/util/label.py new file mode 100644 index 0000000000..89b796ac6c --- /dev/null +++ b/EISeg/eiseg/util/label.py @@ -0,0 +1,95 @@ +import os +import os.path as osp + +from . import colorMap + + +class Label: + def __init__(self, idx=None, name=None, color=None): + self.idx = idx + self.name = name + self.color = color + + def __repr__(self): + return f"{self.idx} {self.name} {self.color}" + + +class LabelList(object): + def __init__(self, labels: dict = None): + self.labelList = [] + if labels is not None: + for lab in labels: + color = lab.get("color", colorMap.get_color()) + self.add(lab["id"], lab["name"], color) + + def add(self, idx, name, color): + self.labelList.append(Label(idx, name, color)) + + def remove(self, index): + for idx, lab in enumerate(self.labelList): + if lab.idx == index: + del self.labelList[idx] + break + # del self.labelList[index] + + def clear(self): + self.labelList = [] + + def toint(self, seq): + if isinstance(seq, list): + for i in range(len(seq)): + try: + seq[i] = int(seq[i]) + except ValueError: + pass + else: + seq = int(seq) + return seq + + def readLabel(self, path): + if not osp.exists(path): + return [] + with open(path, "r", encoding="utf-8") as f: + labels = f.readlines() + labelList = [] + for lab in labels: + lab = lab.replace("\n", "").strip(" ").split(" ") + if len(lab) != 2 and len(lab) != 5: + print(f"{lab} 标签不合法") + continue + label = Label(self.toint(lab[0]), str(lab[1]), self.toint(lab[2:])) + labelList.append(label) + self.labelList = labelList + + def saveLabel(self, path): + if not path or not osp.exists(osp.dirname(path)): + print("label path don't exist") + return + with open(path, "w", encoding="utf-8") as f: + for label in self.labelList: + print(label.idx, end=" ", file=f) + print(label.name, end=" ", file=f) + for idx in range(3): + print(label.color[idx], end=" ", file=f) + print(file=f) + + def getLabelById(self, labelIdx): + for lab in self.labelList: + if lab.idx == labelIdx: + return lab + + def __repr__(self): + return str(self.labelList) + + def __getitem__(self, index): + return self.labelList[index] + + def __len__(self): + return len(self.labelList) + + @property + def colors(self): + cols = [] + for lab in self.labelList: + cols.append(lab.color) + return cols diff --git a/EISeg/eiseg/util/language.py b/EISeg/eiseg/util/language.py new file mode 100644 index 0000000000..b3b6c4a576 --- /dev/null +++ b/EISeg/eiseg/util/language.py @@ -0,0 +1,40 @@ +import os.path as osp +import re +from eiseg import pjpath +from collections import defaultdict +import json +from urllib import parse +import requests + + +class TransUI(object): + def __init__(self, is_trans=False): + super().__init__() + self.trans_dict = defaultdict(dict) + with open(osp.join(pjpath, "config/zh_CN.EN"), "r", encoding="utf-8") as f: + texts = f.readlines() + for txt in texts: + strs = txt.split("@") + self.trans_dict[strs[0].strip()] = strs[1].strip() + self.is_trans = is_trans + self.youdao_url = "http://fanyi.youdao.com/translate?&doctype=json&type=AUTO&i=" + + def put(self, zh_CN): + if self.is_trans == False: + return zh_CN + else: + try: + return str(self.trans_dict[zh_CN]) + except: + return zh_CN + + # 联网动态翻译 + def tr(self, zh_CN): + try: + tr_url = self.youdao_url + parse.quote(zh_CN) + response = requests.get(tr_url) + js = json.loads(response.text) + result_EN = js["translateResult"][0][0]["tgt"] + return str(result_EN) + except: + return zh_CN \ No newline at end of file diff --git a/EISeg/eiseg/util/manager.py b/EISeg/eiseg/util/manager.py new file mode 100644 index 0000000000..32127422bc --- /dev/null +++ b/EISeg/eiseg/util/manager.py @@ -0,0 +1,77 @@ +import inspect +from collections.abc import Sequence + + +class ComponentManager: + def __init__(self, name=None): + self._components_dict = dict() + self._name = name + + def __len__(self): + return len(self._components_dict) + + def __repr__(self): + name_str = self._name if self._name else self.__class__.__name__ + return "{}:{}".format(name_str, list(self._components_dict.keys())) + + def __getitem__(self, item): + if isinstance(item, int): + if item >= len(self): + raise KeyError(f"指定的下标 {item} 在长度为 {len(self)} 的 {self} 中越界") + return list(self._components_dict.values())[item] + if item not in self._components_dict.keys(): + raise KeyError(f"{self} 中不存在 {item}") + return self._components_dict[item] + + def __iter__(self): + for val in self._components_dict.values(): + yield val + + def keys(self): + return list(self._components_dict.keys()) + + def idx(self, item): + for idx, val in enumerate(self.keys()): + if val == item: + return idx + raise KeyError(f"{item} is not in {self}") + + @property + def components_dict(self): + return self._components_dict + + @property + def name(self): + return self._name + + def _add_single_component(self, component): + # Currently only support class or function type + if not (inspect.isclass(component) or inspect.isfunction(component)): + raise TypeError( + "Expect class/function type, but received {}".format(type(component)) + ) + + # Obtain the internal name of the component + component_name = component.__name__ + + # Check whether the component was added already + if component_name in self._components_dict.keys(): + raise KeyError("{} exists already!".format(component_name)) + else: + # Take the internal name of the component as its key + self._components_dict[component_name] = component + + def add_component(self, components): + # Check whether the type is a sequence + if isinstance(components, Sequence): + for component in components: + self._add_single_component(component) + else: + component = components + self._add_single_component(component) + + return components + + +MODELS = ComponentManager("models") +ACTIONS = ComponentManager("actions") diff --git a/contrib/EISeg/eiseg/util/misc.py b/EISeg/eiseg/util/misc.py similarity index 100% rename from contrib/EISeg/eiseg/util/misc.py rename to EISeg/eiseg/util/misc.py diff --git a/EISeg/eiseg/util/polygon.py b/EISeg/eiseg/util/polygon.py new file mode 100644 index 0000000000..9baa8dc9ba --- /dev/null +++ b/EISeg/eiseg/util/polygon.py @@ -0,0 +1,78 @@ +from enum import Enum + +import cv2 +from math import sqrt +import matplotlib.pyplot as plt + + +class Instructions(Enum): + No_Instruction = 0 + Polygon_Instruction = 1 + + +def get_polygon(label, sample=2): + results = cv2.findContours( + image=label, mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_TC89_KCOS + ) # 获取内外边界,用RETR_TREE更好表示 + cv2_v = cv2.__version__.split(".")[0] + contours = results[1] if cv2_v == "3" else results[0] # 边界 + hierarchys = results[2] if cv2_v == "3" else results[1] # 隶属信息 + if len(contours) != 0: # 可能出现没有边界的情况 + polygons = [] + relas = [] + for contour, hierarchy in zip(contours, hierarchys[0]): + out = cv2.approxPolyDP(contour, sample, True) + # 判断自己,如果是子对象就不管自己是谁 + if hierarchy[2] == -1: + own = None + else: + if hierarchy[0] == -1 and hierarchy[1] == -1: + own = 0 + elif hierarchy[0] != -1 and hierarchy[1] == -1: + own = hierarchy[0] - 1 + else: + own = hierarchy[1] + 1 + rela = (own, # own + hierarchy[-1] if hierarchy[-1] != -1 else None) # parent + polygon = [] + for p in out: + polygon.append(p[0]) + polygons.append(polygon) # 边界 + relas.append(rela) # 关系 + for i in range(len(relas)): + if relas[i][1] != None: # 有父母 + for j in range(len(relas)): + if relas[j][0] == relas[i][1]: # i的父母就是j(i是j的内圈) + min_i, min_o = _find_min_point(polygons[i], polygons[j]) + # 改变顺序 + s_pj = polygons[j][: min_o] + polygons[j] = polygons[j][min_o:] + polygons[j].extend(s_pj) + s_pi = polygons[i][: min_i] + polygons[i] = polygons[i][min_i:] + polygons[i].extend(s_pi) + # 连接 + polygons[j].append(polygons[j][0]) # 闭合 + polygons[j].extend(polygons[i]) + polygons[j].append(polygons[i][0]) # 闭合 + polygons[i] = None + polygons = list(filter(None, polygons)) # 清除加到外圈的内圈多边形 + return polygons + else: + print("没有标签范围,无法生成边界") + return None + + +def _find_min_point(i_list, o_list): + min_dis = 1e7 + idx_i = -1 + idx_o = -1 + for i in range(len(i_list)): + for o in range(len(o_list)): + dis = sqrt((i_list[i][0] - o_list[o][0]) ** 2 + \ + (i_list[i][1] - o_list[o][1]) ** 2) + if dis < min_dis: + min_dis = dis + idx_i = i + idx_o = o + return idx_i, idx_o \ No newline at end of file diff --git a/contrib/EISeg/eiseg/util/qt.py b/EISeg/eiseg/util/qt.py similarity index 68% rename from contrib/EISeg/eiseg/util/qt.py rename to EISeg/eiseg/util/qt.py index b3903c0c51..fa36532980 100644 --- a/contrib/EISeg/eiseg/util/qt.py +++ b/EISeg/eiseg/util/qt.py @@ -3,15 +3,22 @@ import numpy as np +from eiseg import pjpath from qtpy import QtCore from qtpy import QtGui from qtpy import QtWidgets +from .config import parse_configs - +shortcuts = parse_configs(osp.join(pjpath, "config/config.yaml"))["shortcut"] here = osp.dirname(osp.abspath(__file__)) def newIcon(icon): + if isinstance(icon, list) or isinstance(icon, tuple): + pixmap = QtGui.QPixmap(100, 100) + c = icon + pixmap.fill(QtGui.QColor(c[0], c[1], c[2])) + return QtGui.QIcon(pixmap) icons_dir = osp.join(here, "../resource") return QtGui.QIcon(osp.join(":/", icons_dir, f"{icon}.png")) @@ -29,7 +36,7 @@ def newAction( parent, text, slot=None, - shortcut=None, + shortcutName=None, icon=None, tip=None, checkable=False, @@ -38,10 +45,12 @@ def newAction( ): """Create a new action and assign callbacks, shortcuts, etc.""" a = QtWidgets.QAction(text, parent) + a.setData(shortcutName) # a = QtWidgets.QAction("", parent) if icon is not None: a.setIconText(text.replace(" ", "\n")) a.setIcon(newIcon(icon)) + shortcut = shortcuts.get(shortcutName, None) if shortcut is not None: if isinstance(shortcut, (list, tuple)): a.setShortcuts(shortcut) @@ -77,23 +86,21 @@ class struct(object): def __init__(self, **kwargs): self.__dict__.update(kwargs) + def __len__(self): + return len(self.__dict__) + + def append(self, action): + if isinstance(action, QtWidgets.QAction): + self.__dict__.update({action.data(): action}) -# def distance(p): -# return sqrt(p.x() * p.x() + p.y() * p.y()) + def __iter__(self): + return list(self.__dict__.values()).__iter__() + def __getitem__(self, idx): + return list(self.__dict__.values())[idx] -# def distancetoline(point, line): -# p1, p2 = line -# p1 = np.array([p1.x(), p1.y()]) -# p2 = np.array([p2.x(), p2.y()]) -# p3 = np.array([point.x(), point.y()]) -# if np.dot((p3 - p1), (p2 - p1)) < 0: -# return np.linalg.norm(p3 - p1) -# if np.dot((p3 - p2), (p1 - p2)) < 0: -# return np.linalg.norm(p3 - p2) -# if np.linalg.norm(p2 - p1) == 0: -# return 0 -# return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1) + def get(self, name): + return self.__dict__[name] def fmtShortcut(text): diff --git a/contrib/EISeg/eiseg/util/serialization.py b/EISeg/eiseg/util/serialization.py similarity index 100% rename from contrib/EISeg/eiseg/util/serialization.py rename to EISeg/eiseg/util/serialization.py diff --git a/EISeg/eiseg/util/translate/English.qm b/EISeg/eiseg/util/translate/English.qm new file mode 100644 index 0000000000..3e373295dc Binary files /dev/null and b/EISeg/eiseg/util/translate/English.qm differ diff --git a/contrib/EISeg/eiseg/util/vis.py b/EISeg/eiseg/util/vis.py similarity index 88% rename from contrib/EISeg/eiseg/util/vis.py rename to EISeg/eiseg/util/vis.py index 7fa1a7bfce..6b8b7c7570 100644 --- a/contrib/EISeg/eiseg/util/vis.py +++ b/EISeg/eiseg/util/vis.py @@ -26,20 +26,6 @@ def visualize_instances( @lru_cache(maxsize=16) def get_palette(num_cls): return np.array([[0, 0, 0], [128, 0, 0], [0, 128, 0], [0, 0, 128]]) - palette = np.zeros(3 * num_cls, dtype=np.int32) - - for j in range(0, num_cls): - lab = j - i = 0 - - while lab > 0: - palette[j * 3 + 0] |= ((lab >> 0) & 1) << (7 - i) - palette[j * 3 + 1] |= ((lab >> 1) & 1) << (7 - i) - palette[j * 3 + 2] |= ((lab >> 2) & 1) << (7 - i) - i = i + 1 - lab >>= 3 - - return palette.reshape((-1, 3)) def visualize_mask(mask, num_cls): @@ -134,8 +120,6 @@ def draw_with_blend_and_clicks( ) result = result.astype(np.uint8) - # result = (result * (1 - alpha) + alpha * rgb_mask).astype(np.uint8) - if clicks_list is not None and len(clicks_list) > 0: pos_points = [click.coords for click in clicks_list if click.is_positive] neg_points = [click.coords for click in clicks_list if not click.is_positive] diff --git a/EISeg/eiseg/widget/__init__.py b/EISeg/eiseg/widget/__init__.py new file mode 100644 index 0000000000..225ee2db10 --- /dev/null +++ b/EISeg/eiseg/widget/__init__.py @@ -0,0 +1,9 @@ +from .shortcut import ShortcutWindow +from .line import LineItem +from .grip import GripItem +from .bbox import BBoxAnnotation +from .polygon import PolygonAnnotation +from .scene import AnnotationScene +from .view import AnnotationView +from .create import * +from .table import TableWidget \ No newline at end of file diff --git a/EISeg/eiseg/widget/bbox.py b/EISeg/eiseg/widget/bbox.py new file mode 100644 index 0000000000..41d350f14b --- /dev/null +++ b/EISeg/eiseg/widget/bbox.py @@ -0,0 +1,133 @@ +from qtpy import QtWidgets, QtGui, QtCore +from qtpy.QtCore import Qt + + +# Note, bbox annotation is more convenient than the default boundingBox generated by QGrpaphicItem +class BBoxAnnotation(QtWidgets.QGraphicsPathItem): + def __init__(self, + labelIndex, + polyline, + borderColor=[0, 0, 255], + cocoIndex=None, + parent=None, + ): + super(BBoxAnnotation, self).__init__(parent) + self.polyline = polyline + self.corner_points = [] + self.upper_right = QtCore.QPointF() + self.bottom_left = QtCore.QPointF() + self.w = -1.0 + self.h = -1.0 + + self.parent = parent + self.is_added = False + if self.parent is not None: + self.is_added = True + self.labelIndex = labelIndex + self.coco_id = cocoIndex + self.bbox_hovering = True + + # set rendering attributes + self.setZValue(10) + + # b = borderColor + # self.borderColor = QtGui.QColor(b[0], b[1], b[2]) + self.borderColor = QtGui.QColor(128,128,128) + self.borderColor.setAlphaF(0.8) + pen = QtGui.QPen(self.borderColor, 1.2) + pen.setStyle(Qt.DashDotLine) + self.setPen(pen) + + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, False) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsMovable, False) + self.setFlag(QtWidgets.QGraphicsItem.ItemSendsGeometryChanges, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, False) + self.setAcceptHoverEvents(False) + # self.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + + @property + def scnenePoints(self): + # return 4 corner points + raise Exception("Not Implemented Yet!") + + def setAnning(self, isAnning=True): + raise Exception("Not Implemented Yet!") + + def remove(self): + raise Exception("Not Implemented Yet!") + + # ===== generate geometry info + + def create_corners(self): + bbox_rect_geo = self.polyline.boundingRect() + self.bottom_left = bbox_rect_geo.bottomLeft() + self.upper_right = bbox_rect_geo.topRight() + self.corner_points.clear() + self.corner_points.extend([ + self.bottom_left, + bbox_rect_geo.topLeft(), + self.upper_right, + bbox_rect_geo.bottomRight() + ]) + self.w = self.corner_points[3].x() - self.corner_points[1].x() + self.h = self.corner_points[3].y() - self.corner_points[1].y() + + if self.corner_points[1].x() > 512 or self.corner_points[1].x() + self.w > 512: + pass + if self.corner_points[1].y() > 512 or self.corner_points[1].y() + self.h > 512: + pass + return self.corner_points + + def create_lines(self): + pass + + # ===== graphic interface to update in scene tree + + def update(self): + l = len(self.polyline.points) + # print("up L:", l, " is_added:", self.is_added) + if l < 3: + if self.is_added: + self.remove_from_scene() + else: # 大于三个点就可以更新,小于三个点删除多边形 + if self.is_added: + self.add_to_scene() + else: + path_geo = QtGui.QPainterPath() + self.create_corners() + path_geo.moveTo(self.corner_points[0]) + for i in range(4): + path_geo.lineTo(self.corner_points[(i + 1) % 4]) + self.setPath(QtGui.QPainterPath(path_geo)) + pass + pass + pass + + def add_to_scene(self): + # self.parentItem().scene().addItem(self) + self.setParentItem(self.parent) + self.is_added = True + + def remove_from_scene(self): + # self.parentItem().scene().removeItem(self) + self.setParentItem(None) + self.is_added = False + + # ===== annotation info + + # @return : [x, y, w, h] + def to_array(self): + np_array = [self._round(self.corner_points[1].x()), + self._round(self.corner_points[1].y()), # topLeft + self._round(self.w), self._round(self.h)] + return np_array + + def _round(self, number, ind=0): + nint, ndec = str(number).split(".") + res = float(nint + "." + ndec[:ind]) + if res <= 0: + res = .0 + return res + + def __del__(self): + self.corner_points.clear() \ No newline at end of file diff --git a/EISeg/eiseg/widget/create.py b/EISeg/eiseg/widget/create.py new file mode 100644 index 0000000000..b0fb05e5df --- /dev/null +++ b/EISeg/eiseg/widget/create.py @@ -0,0 +1,109 @@ +from qtpy.QtWidgets import QDockWidget +from qtpy import QtCore, QtGui, QtWidgets +from qtpy.QtCore import Qt + + +## 创建文本 +def create_text(parent, text_name=None, text_text=None): + text = QtWidgets.QLabel(parent) + if text_name is not None: + text.setObjectName(text_name) + if text_text is not None: + text.setText(text_text) + return text + + +## 创建按钮 +def create_button(parent, btn_name, btn_text, ico_path=None, curt=None): + # 创建和设置按钮 + sizePolicy = QtWidgets.QSizePolicy( + QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed + ) + min_size = QtCore.QSize(0, 40) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + btn = QtWidgets.QPushButton(parent) + sizePolicy.setHeightForWidth(btn.sizePolicy().hasHeightForWidth()) + btn.setSizePolicy(sizePolicy) + btn.setMinimumSize(min_size) + btn.setObjectName(btn_name) + if ico_path is not None: + btn.setIcon(QtGui.QIcon(ico_path)) + btn.setText(btn_text) + if curt is not None: + btn.setShortcut(curt) + return btn + + +## 创建滑块区域 +def create_slider( + parent, + sld_name, + text_name, + text, + default_value=50, + max_value=100, + text_rate=0.01, +): + Region = QtWidgets.QHBoxLayout() + lab = create_text(parent, None, text) + Region.addWidget(lab) + labShow = create_text(parent, text_name, str(default_value * text_rate)) + Region.addWidget(labShow) + Region.setStretch(0, 1) + Region.setStretch(1, 10) + sld = QtWidgets.QSlider(parent) + sld.setMaximum(max_value) # 好像只能整数的,这里是扩大了10倍,1 . 10 + sld.setProperty("value", default_value) + sld.setOrientation(QtCore.Qt.Horizontal) + sld.setObjectName(sld_name) + sld.setStyleSheet( + """ + QSlider::sub-page:horizontal { + background: #9999F1 + } + QSlider::handle:horizontal + { + background: #3334E3; + width: 12px; + border-radius: 4px; + } + """ + ) + sld.textLab = labShow + return sld, Region + + +class DockWidget(QDockWidget): + def __init__(self, parent, name, text): + super().__init__(parent=parent) + self.setObjectName(name) + self.setAllowedAreas(Qt.RightDockWidgetArea | Qt.LeftDockWidgetArea) + # 感觉不给关闭好点。可以在显示里面取消显示。有关闭的话显示里面的enable还能判断修改,累了 + self.setFeatures( + QDockWidget.DockWidgetMovable | + QDockWidget.DockWidgetFloatable + ) + sizePolicy = QtWidgets.QSizePolicy( + QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred + ) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(self.sizePolicy().hasHeightForWidth()) + self.setSizePolicy(sizePolicy) + self.setMinimumWidth(230) + self.setWindowTitle(text) + self.setStyleSheet("QDockWidget { background-color:rgb(204,204,248); }") + self.topLevelChanged.connect(self.changeBackColor) + + def changeBackColor(self, isFloating): + if isFloating: + self.setStyleSheet("QDockWidget { background-color:rgb(255,255,255); }") + else: + self.setStyleSheet("QDockWidget { background-color:rgb(204,204,248); }") + +## 创建dock +def creat_dock(parent, name, text, widget): + dock = DockWidget(parent, name, text) + dock.setWidget(widget) + return dock \ No newline at end of file diff --git a/EISeg/eiseg/widget/grip.py b/EISeg/eiseg/widget/grip.py new file mode 100644 index 0000000000..885c2b1286 --- /dev/null +++ b/EISeg/eiseg/widget/grip.py @@ -0,0 +1,77 @@ +from qtpy import QtWidgets, QtGui, QtCore + +# BUG: item 不能移出图片的范围,需要限制起来 +class GripItem(QtWidgets.QGraphicsPathItem): + fixedSize = 6 + + def __init__(self, annotation_item, index, color): + super(GripItem, self).__init__() + self.m_annotation_item = annotation_item + self.hovering = False + self.m_index = index + color.setAlphaF(1) + self.color = color + + self.updateSize() + self.setPath(self.circle) + self.setBrush(self.color) + self.setPen(QtGui.QPen(self.color, 1)) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsMovable, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemSendsGeometryChanges, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, True) + self.setAcceptHoverEvents(True) + self.setZValue(12) + self.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + + def setColor(self, color): + self.setBrush(color) + self.setPen(QtGui.QPen(color, 1)) + self.color = color + + @property + def size(self): + if not self.scene(): + return 2 + else: + return GripItem.fixedSize / self.scene().scale + + def updateSize(self, size=2): + size = self.size + self.circle = QtGui.QPainterPath() + self.circle.addEllipse(QtCore.QRectF(-size, -size, size * 2, size * 2)) + self.square = QtGui.QPainterPath() + self.square.addRect(QtCore.QRectF(-size, -size, size * 2, size * 2)) + self.setPath(self.square if self.hovering else self.circle) + + def hoverEnterEvent(self, ev): + self.setPath(self.square) + self.setBrush(QtGui.QColor(0, 0, 0, 0)) + self.m_annotation_item.item_hovering = True + self.hovring = True + super(GripItem, self).hoverEnterEvent(ev) + + def hoverLeaveEvent(self, ev): + self.setPath(self.circle) + self.setBrush(self.color) + self.m_annotation_item.item_hovering = False + self.hovring = False + super(GripItem, self).hoverLeaveEvent(ev) + + def mouseReleaseEvent(self, ev): + self.setSelected(False) + super(GripItem, self).mouseReleaseEvent(ev) + + def itemChange(self, change, value): + if change == QtWidgets.QGraphicsItem.ItemPositionChange and self.isEnabled(): + self.m_annotation_item.movePoint(self.m_index, value) + return super(GripItem, self).itemChange(change, value) + + def shape(self): + s = super(GripItem, self).shape().boundingRect().x() * 3 + path = QtGui.QPainterPath() + path.addRect(QtCore.QRectF(-s, -s, 2 * s, 2 * s)) + return path + + def mouseDoubleClickEvent(self, ev): + self.m_annotation_item.removeFocusPoint() diff --git a/EISeg/eiseg/widget/line.py b/EISeg/eiseg/widget/line.py new file mode 100644 index 0000000000..f37825d36c --- /dev/null +++ b/EISeg/eiseg/widget/line.py @@ -0,0 +1,60 @@ +from qtpy import QtWidgets, QtGui, QtCore + + +class LineItem(QtWidgets.QGraphicsLineItem): + fixedWidth = 1 + + def __init__(self, annotation_item, idx, color): + super(LineItem, self).__init__() + self.polygon_item = annotation_item + self.idx = idx + self.color = color + self.setPen(QtGui.QPen(color, self.width)) + + self.setZValue(11) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, True) + self.setAcceptHoverEvents(True) + + def setColor(self, color): + self.setPen(QtGui.QPen(color, self.width)) + self.color = color + + @property + def width(self): + if not self.scene(): + width = 1 + else: + width = LineItem.fixedWidth / self.scene().scale + return width + + def updateWidth(self): + self.setPen(QtGui.QPen(self.color, self.width)) + + def hoverEnterEvent(self, ev): + self.polygon_item.line_hovering = True + self.setPen(QtGui.QPen(self.color, self.width * 3)) + super(LineItem, self).hoverEnterEvent(ev) + + def hoverLeaveEvent(self, ev): + self.polygon_item.line_hovering = False + self.setPen(QtGui.QPen(self.color, self.width)) + super(LineItem, self).hoverLeaveEvent(ev) + + def mouseDoubleClickEvent(self, ev): + self.setPen(QtGui.QPen(self.color, self.width)) + self.polygon_item.addPointMiddle(self.idx, ev.pos()) + super(LineItem, self).mouseDoubleClickEvent(ev) + + def shape(self): + path = QtGui.QPainterPath() + path.addPolygon(self.boundingPolygon()) + return path + + def boundingPolygon(self): + w = self.width * 10 + w = max(w, 3) + p = QtCore.QPointF(w, 0) + s, e = self.line().p1(), self.line().p2() + poly = QtGui.QPolygonF([s - p, s + p, e + p, e - p]) + return poly diff --git a/EISeg/eiseg/widget/polygon.py b/EISeg/eiseg/widget/polygon.py new file mode 100644 index 0000000000..8e1bd5ed29 --- /dev/null +++ b/EISeg/eiseg/widget/polygon.py @@ -0,0 +1,271 @@ +from qtpy import QtWidgets, QtGui, QtCore + +from . import GripItem, LineItem, BBoxAnnotation + + +class PolygonAnnotation(QtWidgets.QGraphicsPolygonItem): + def __init__( + self, + labelIndex, + shape, + delPolygon, + insideColor=[255, 0, 0], + borderColor=[0, 255, 0], + opacity=0.5, + cocoIndex=None, + parent=None, + ): + super(PolygonAnnotation, self).__init__(parent) + self.points = [] + self.m_items = [] + self.m_lines = [] + self.coco_id = cocoIndex + self.height, self.width = shape[:2] + self.delPolygon = delPolygon + + self.labelIndex = labelIndex + self.item_hovering = False + self.polygon_hovering = False + self.anning = False # 是否标注模式 + self.line_hovering = False + self.noMove = False + self.last_focse = False # 之前是不是焦点在 + + self.setZValue(10) + self.opacity = opacity + i = insideColor + self.insideColor = QtGui.QColor(i[0], i[1], i[2]) + self.insideColor.setAlphaF(opacity) + self.halfInsideColor = QtGui.QColor(i[0], i[1], i[2]) + self.halfInsideColor.setAlphaF(opacity / 2) + self.setBrush(self.halfInsideColor) + b = borderColor + self.borderColor = QtGui.QColor(b[0], b[1], b[2]) + self.borderColor.setAlphaF(0.8) + self.setPen(QtGui.QPen(self.borderColor)) + self.setAcceptHoverEvents(True) + + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsMovable, False) + self.setFlag(QtWidgets.QGraphicsItem.ItemSendsGeometryChanges, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, True) + + self.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + + # persistent this bbox instance and update when needed + self.bbox = BBoxAnnotation(labelIndex, self, cocoIndex, self) + self.bbox.setParentItem(self) + + @property + def scnenePoints(self): + points = [] + for p in self.points: + p = self.mapToScene(p) + points.append([p.x(), p.y()]) + return points + + def setAnning(self, isAnning=True): + if isAnning: + self.setAcceptHoverEvents(False) + self.last_focse = self.polygon_hovering + self.polygon_hovering = False + self.anning = True + self.setBrush(QtGui.QBrush(QtCore.Qt.NoBrush)) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, False) + # self.setFlag(QtWidgets.QGraphicsItem.ItemIsMovable, False) + self.setFlag(QtWidgets.QGraphicsItem.ItemSendsGeometryChanges, False) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, False) + self.setCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor)) + else: + self.setAcceptHoverEvents(True) + self.anning = False + if self.last_focse: + self.polygon_hovering = True + self.setBrush(self.insideColor) + else: + self.setBrush(self.halfInsideColor) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsSelectable, True) + # self.setFlag(QtWidgets.QGraphicsItem.ItemIsMovable, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemSendsGeometryChanges, True) + self.setFlag(QtWidgets.QGraphicsItem.ItemIsFocusable, True) + self.setCursor(QtGui.QCursor(QtCore.Qt.PointingHandCursor)) + + def addPointMiddle(self, lineIdx, point): + gripItem = GripItem(self, lineIdx + 1, self.borderColor) + gripItem.setEnabled(False) + gripItem.setPos(point) + self.scene().addItem(gripItem) + gripItem.updateSize() + gripItem.setEnabled(True) + for grip in self.m_items[lineIdx + 1 :]: + grip.m_index += 1 + self.m_items.insert(lineIdx + 1, gripItem) + self.points.insert(lineIdx + 1, self.mapFromScene(point)) + self.setPolygon(QtGui.QPolygonF(self.points)) + self.bbox.update() + for line in self.m_lines[lineIdx + 1 :]: + line.idx += 1 + line = QtCore.QLineF(self.mapToScene(self.points[lineIdx]), point) + self.m_lines[lineIdx].setLine(line) + lineItem = LineItem(self, lineIdx + 1, self.borderColor) + line = QtCore.QLineF( + point, + self.mapToScene(self.points[(lineIdx + 2) % len(self)]), + ) + lineItem.setLine(line) + self.m_lines.insert(lineIdx + 1, lineItem) + self.scene().addItem(lineItem) + + def addPointLast(self, p): + grip = GripItem(self, len(self), self.borderColor) + self.scene().addItem(grip) + self.m_items.append(grip) + grip.updateSize() + grip.setPos(p) + if len(self) == 0: + line = LineItem(self, len(self), self.borderColor) + self.scene().addItem(line) + self.m_lines.append(line) + line.setLine(QtCore.QLineF(p, p)) + else: + self.m_lines[-1].setLine(QtCore.QLineF(self.points[-1], p)) + line = LineItem(self, len(self), self.borderColor) + self.scene().addItem(line) + self.m_lines.append(line) + line.setLine(QtCore.QLineF(p, self.points[0])) + + self.points.append(p) + self.setPolygon(QtGui.QPolygonF(self.points)) + self.bbox.update() + + def remove(self): + for grip in self.m_items: + self.scene().removeItem(grip) + for line in self.m_lines: + self.scene().removeItem(line) + while len(self.m_items) != 0: + self.m_items.pop() + while len(self.m_lines) != 0: + self.m_lines.pop() + self.scene().polygon_items.remove(self) + self.scene().removeItem(self) + self.bbox.remove_from_scene() + del self.bbox + del self + + def removeFocusPoint(self): + focusIdx = None + for idx, item in enumerate(self.m_items): + if item.hasFocus(): + focusIdx = idx + break + if focusIdx is not None: + if len(self) <= 3: + self.delPolygon(self) # 调用app的删除多边形,为了同时删除coco标签 + return + del self.points[focusIdx] + self.setPolygon(QtGui.QPolygonF(self.points)) + self.bbox.update() + self.scene().removeItem(self.m_items[focusIdx]) + del self.m_items[focusIdx] + for grip in self.m_items[focusIdx:]: + grip.m_index -= 1 + + self.scene().removeItem(self.m_lines[focusIdx]) + del self.m_lines[focusIdx] + line = QtCore.QLineF( + self.mapToScene(self.points[(focusIdx - 1) % len(self)]), + self.mapToScene(self.points[focusIdx % len(self)]), + ) + # print((focusIdx - 1) % len(self), len(self.m_lines), len(self)) + self.m_lines[(focusIdx - 1) % len(self)].setLine(line) + for line in self.m_lines[focusIdx:]: + line.idx -= 1 + + def removeLastPoint(self): + # TODO: 创建的时候用到,需要删line + if len(self.points) == 0: + self.points.pop() + self.setPolygon(QtGui.QPolygonF(self.points)) + self.bbox.update() + it = self.m_items.pop() + self.scene().removeItem(it) + del it + + def movePoint(self, i, p): + # print("Move point", i, p) + if 0 <= i < len(self.points): + p = self.mapFromScene(p) + self.points[i] = p + self.setPolygon(QtGui.QPolygonF(self.points)) + self.bbox.update() + self.moveLine(i) + + def moveLine(self, i): + # print("Moving line: ", i, self.noMove) + if self.noMove: + return + points = self.points + # line[i] + line = QtCore.QLineF( + self.mapToScene(points[i]), self.mapToScene(points[(i + 1) % len(self)]) + ) + self.m_lines[i].setLine(line) + # line[i-1] + line = QtCore.QLineF( + self.mapToScene(points[(i - 1) % len(self)]), self.mapToScene(points[i]) + ) + # print((i - 1) % len(self), len(self.m_lines), len(self)) + self.m_lines[(i - 1) % len(self)].setLine(line) + + def move_item(self, i, pos): + if 0 <= i < len(self.m_items): + item = self.m_items[i] + item.setEnabled(False) + item.setPos(pos) + item.setEnabled(True) + self.moveLine(i) + + def itemChange(self, change, value): + if change == QtWidgets.QGraphicsItem.ItemPositionHasChanged: + for i, point in enumerate(self.points): + self.move_item(i, self.mapToScene(point)) + return super(PolygonAnnotation, self).itemChange(change, value) + + def hoverEnterEvent(self, ev): + self.polygon_hovering = True + self.setBrush(self.insideColor) + super(PolygonAnnotation, self).hoverEnterEvent(ev) + + def hoverLeaveEvent(self, ev): + self.polygon_hovering = False + if not self.hasFocus(): + self.setBrush(self.halfInsideColor) + super(PolygonAnnotation, self).hoverLeaveEvent(ev) + + def focusInEvent(self, ev): + if not self.anning: + self.setBrush(self.insideColor) + + def focusOutEvent(self, ev): + if not self.polygon_hovering and not self.anning: + self.setBrush(self.halfInsideColor) + + def setColor(self, insideColor, borderColor): + i = insideColor + self.insideColor = QtGui.QColor(i[0], i[1], i[2]) + self.insideColor.setAlphaF(self.opacity) + self.halfInsideColor = QtGui.QColor(i[0], i[1], i[2]) + self.halfInsideColor.setAlphaF(self.opacity / 2) + self.setBrush(self.halfInsideColor) + b = borderColor + self.borderColor = QtGui.QColor(b[0], b[1], b[2]) + self.borderColor.setAlphaF(0.8) + self.setPen(QtGui.QPen(self.borderColor)) + for grip in self.m_items: + grip.setColor(self.borderColor) + for line in self.m_lines: + line.setColor(self.borderColor) + + def __len__(self): + return len(self.points) diff --git a/EISeg/eiseg/widget/scene.py b/EISeg/eiseg/widget/scene.py new file mode 100644 index 0000000000..92ca6a970e --- /dev/null +++ b/EISeg/eiseg/widget/scene.py @@ -0,0 +1,69 @@ +from qtpy import QtWidgets, QtCore +from qtpy.QtCore import Qt + + +class AnnotationScene(QtWidgets.QGraphicsScene): + clickRequest = QtCore.Signal(int, int, bool) + + def __init__(self, parent=None): + super(AnnotationScene, self).__init__(parent) + self.creating = False + self.polygon_items = [] + + def updatePolygonSize(self): + for poly in self.polygon_items: + for grip in poly.m_items: + grip.updateSize() + for line in poly.m_lines: + line.updateWidth() + + def setCreating(self, creating=True): + self.creating = creating + + def mousePressEvent(self, ev): + pos = ev.scenePos() + if not self.creating and not self.hovering: + if ev.buttons() in [Qt.LeftButton, Qt.RightButton]: + self.clickRequest.emit( + int(pos.x()), int(pos.y()), ev.buttons() == Qt.LeftButton + ) + elif self.creating: + self.polygon_item.removeLastPoint() + self.polygon_item.addPointLast(ev.scenePos()) + # movable element + self.polygon_item.addPointLast(ev.scenePos()) + super(AnnotationScene, self).mousePressEvent(ev) + + def mouseMoveEvent(self, ev): + if self.creating: + self.polygon_item.movePoint( + # self.polygon_item.number_of_points() - 1, ev.scenePos() + len(self.polygon_item) - 1, + ev.scenePos(), + ) + super(AnnotationScene, self).mouseMoveEvent(ev) + + @property + def item_hovering(self): + for poly in self.polygon_items: + if poly.item_hovering: + return True + return False + + @property + def polygon_hovering(self): + for poly in self.polygon_items: + if poly.polygon_hovering: + return True + return False + + @property + def line_hovering(self): + for poly in self.polygon_items: + if poly.line_hovering: + return True + return False + + @property + def hovering(self): + return self.item_hovering or self.polygon_hovering or self.line_hovering diff --git a/EISeg/eiseg/widget/shortcut.py b/EISeg/eiseg/widget/shortcut.py new file mode 100644 index 0000000000..dc47868d65 --- /dev/null +++ b/EISeg/eiseg/widget/shortcut.py @@ -0,0 +1,138 @@ +import os.path as osp +import math +from functools import partial + +from PyQt5.QtCore import QPoint +from PyQt5.QtWidgets import QDesktopWidget + +from qtpy import QtCore, QtWidgets +from qtpy.QtWidgets import ( + QWidget, + QLabel, + QPushButton, + QGridLayout, + QKeySequenceEdit, + QMessageBox, +) +from qtpy.QtGui import QIcon +from qtpy import QtCore +from qtpy.QtCore import Qt + +from util import save_configs + + +class RecordShortcutWindow(QKeySequenceEdit): + def __init__(self, finishCallback, location): + super().__init__() + self.finishCallback = finishCallback + # 隐藏界面 + self.setWindowFlags(Qt.FramelessWindowHint) + self.move(location) + self.show() + self.editingFinished.connect(lambda: finishCallback(self.keySequence())) + + def keyReleaseEvent(self, ev): + self.finishCallback(self.keySequence()) + + +class ShortcutWindow(QWidget): + def __init__(self, actions, pjpath): + super().__init__() + self.tr = partial(QtCore.QCoreApplication.translate, "ShortcutWindow") + self.setWindowTitle(self.tr("编辑快捷键")) + self.setWindowIcon(QIcon(osp.join(pjpath, "resource/Shortcut.png"))) + # self.setFixedSize(self.width(), self.height()) + self.actions = actions + self.recorder = None + self.initUI() + + def initUI(self): + grid = QGridLayout() + self.setLayout(grid) + + actions = self.actions + for idx, action in enumerate(actions): + # 2列英文看不清 + grid.addWidget(QLabel(action.iconText()[1:]), idx // 3, idx % 3 * 3) + shortcut = action.shortcut().toString() + if len(shortcut) == 0: + shortcut = self.tr("无") + button = QPushButton(shortcut) + button.setFixedWidth(150) + button.setFixedHeight(30) + button.clicked.connect(partial(self.recordShortcut, action)) + grid.addWidget( + button, + idx // 3, + idx % 3 * 3 + 1, + ) + + def refreshUi(self): + actions = self.actions + for idx, action in enumerate(actions): + shortcut = action.shortcut().toString() + if len(shortcut) == 0: + shortcut = self.tr("无") + self.layout().itemAtPosition( + idx // 3, + idx % 3 * 3 + 1, + ).widget().setText(shortcut) + + def recordShortcut(self, action): + # 打开快捷键设置的窗口时,如果之前的还在就先关闭 + if self.recorder is not None: + self.recorder.close() + rect = self.geometry() + x = rect.x() + y = rect.y() + rect.height() + self.recorder = RecordShortcutWindow(self.setShortcut, QPoint(x, y)) + self.currentAction = action + + def setShortcut(self, key): + self.recorder.close() + + for a in self.actions: + if a.shortcut() == key: + key = key.toString() + msg = QMessageBox() + msg.setIcon(QMessageBox.Warning) + msg.setWindowTitle(key + " " + self.tr("快捷键冲突")) + msg.setText( + key + + " " + + self.tr("快捷键已被") + + " " + + a.data() + + " " + + self.tr("使用,请设置其他快捷键或先修改") + + " " + + a.data() + + " " + + self.tr("的快捷键") + ) + msg.setStandardButtons(QMessageBox.Ok) + msg.exec_() + return + key = "" if key.toString() == "Esc" else key # ESC不设置快捷键 + self.currentAction.setShortcut(key) + self.refreshUi() + save_configs(None, None, self.actions) + + def center(self): + qr = self.frameGeometry() + cp = QDesktopWidget().availableGeometry().center() + qr.moveCenter(cp) + self.move(qr.topLeft()) + + # 快捷键设置跟随移动 + def moveEvent(self, event): + p = self.geometry() + x = p.x() + y = p.y() + p.height() + if self.recorder is not None: + self.recorder.move(x, y) + + def closeEvent(self, event): + # 关闭时也退出快捷键设置 + if self.recorder is not None: + self.recorder.close() diff --git a/EISeg/eiseg/widget/table.py b/EISeg/eiseg/widget/table.py new file mode 100644 index 0000000000..cfe99cbc9e --- /dev/null +++ b/EISeg/eiseg/widget/table.py @@ -0,0 +1,60 @@ +from qtpy import QtWidgets +from qtpy.QtCore import Qt + + +class TableWidget(QtWidgets.QTableWidget): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.setDragEnabled(True) + self.setAcceptDrops(True) + self.viewport().setAcceptDrops(True) + self.setDragDropOverwriteMode(False) + self.setDropIndicatorShown(True) + self.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection) + self.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows) + self.setDragDropMode(QtWidgets.QAbstractItemView.InternalMove) + + def dropEvent(self, event): + if event.source() == self: + rows = set([mi.row() for mi in self.selectedIndexes()]) + targetRow = self.indexAt(event.pos()).row() + rows.discard(targetRow) + rows = sorted(rows) + if not rows: + return + if targetRow == -1: + targetRow = self.rowCount() + for _ in range(len(rows)): + self.insertRow(targetRow) + rowMapping = dict() # Src row to target row. + for idx, row in enumerate(rows): + if row < targetRow: + rowMapping[row] = targetRow + idx + else: + rowMapping[row + len(rows)] = targetRow + idx + colCount = self.columnCount() + for srcRow, tgtRow in sorted(rowMapping.items()): + for col in range(0, colCount): + self.setItem(tgtRow, col, self.takeItem(srcRow, col)) + for row in reversed(sorted(rowMapping.keys())): + self.removeRow(row) + event.accept() + return + + def drop_on(self, event): + index = self.indexAt(event.pos()) + if not index.isValid(): + return self.rowCount() + + return index.row() + 1 if self.is_below(event.pos(), index) else index.row() + + def is_below(self, pos, index): + rect = self.visualRect(index) + margin = 2 + if pos.y() - rect.top() < margin: + return False + elif rect.bottom() - pos.y() < margin: + return True + # noinspection PyTypeChecker + return rect.contains(pos, True) and not ( + int(self.model().flags(index)) & Qt.ItemIsDropEnabled) and pos.y() >= rect.center().y() \ No newline at end of file diff --git a/EISeg/eiseg/widget/view.py b/EISeg/eiseg/widget/view.py new file mode 100644 index 0000000000..3c3bbf6518 --- /dev/null +++ b/EISeg/eiseg/widget/view.py @@ -0,0 +1,57 @@ +from qtpy import QtWidgets, QtCore, QtGui +from qtpy.QtCore import Qt + + +class AnnotationView(QtWidgets.QGraphicsView): + zoomRequest = QtCore.Signal(float) + + def __init__(self, *args): + super(AnnotationView, self).__init__(*args) + self.setRenderHints( + QtGui.QPainter.Antialiasing | QtGui.QPainter.SmoothPixmapTransform + ) + self.setMouseTracking(True) + self.setTransformationAnchor(QtWidgets.QGraphicsView.NoAnchor) + self.setResizeAnchor(QtWidgets.QGraphicsView.NoAnchor) + self.point = QtCore.QPoint(0, 0) + self.middle_click = False + self.zoom_all = 1 + + def wheelEvent(self, ev): + if ev.modifiers() & QtCore.Qt.ControlModifier: + zoom = 1 + ev.angleDelta().y() / 2880 + self.zoom_all *= zoom + oldPos = self.mapToScene(ev.pos()) + if self.zoom_all >= 0.02 and self.zoom_all <= 50: # 限制缩放的倍数 + self.scale(zoom, zoom) + newPos = self.mapToScene(ev.pos()) + delta = newPos - oldPos + self.translate(delta.x(), delta.y()) + ev.ignore() + self.zoomRequest.emit(self.zoom_all) + else: + super(AnnotationView, self).wheelEvent(ev) + + def mouseMoveEvent(self, ev): + if self.middle_click and ( + self.horizontalScrollBar().isVisible() + or self.verticalScrollBar().isVisible() + ): + # 放大到出现滚动条才允许拖动,避免出现抖动 + self._endPos = ev.pos() / self.zoom_all - self._startPos / self.zoom_all + # 这儿不写为先减后除,这样会造成速度不一致 + self.point = self.point + self._endPos + self._startPos = ev.pos() + self.translate(self._endPos.x(), self._endPos.y()) + super(AnnotationView, self).mouseMoveEvent(ev) + + def mousePressEvent(self, ev): + if ev.buttons() == Qt.MiddleButton: + self.middle_click = True + self._startPos = ev.pos() + super(AnnotationView, self).mousePressEvent(ev) + + def mouseReleaseEvent(self, ev): + if ev.button() == Qt.MiddleButton: + self.middle_click = False + super(AnnotationView, self).mouseReleaseEvent(ev) diff --git a/EISeg/init.sh b/EISeg/init.sh new file mode 100644 index 0000000000..844b990456 --- /dev/null +++ b/EISeg/init.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +ROOT=`cd "$(dirname ${BASH_SOURCE[0]})" && pwd` + +echo "ROOT : $ROOT" + +export PYTHONPATH=$PYTHONPATH:$ROOT/eiseg diff --git a/contrib/EISeg/requirements.txt b/EISeg/requirements.txt similarity index 86% rename from contrib/EISeg/requirements.txt rename to EISeg/requirements.txt index cfcfe2ec80..ea8156df20 100644 --- a/contrib/EISeg/requirements.txt +++ b/EISeg/requirements.txt @@ -7,4 +7,3 @@ albumentations cython pyyaml wget -qpt == 1.0a6 \ No newline at end of file diff --git a/EISeg/setup.py b/EISeg/setup.py new file mode 100644 index 0000000000..5f20384d9a --- /dev/null +++ b/EISeg/setup.py @@ -0,0 +1,54 @@ +import pathlib +from setuptools import setup, find_packages, Extension + +import numpy as np + +from eiseg import __APPNAME__, __VERSION__ + + +# from Cython.Build import cythonize + +HERE = pathlib.Path(__file__).parent + +README = (HERE / "README.md").read_text(encoding="utf-8") + +with open("requirements.txt") as fin: + REQUIRED_PACKAGES = fin.read() + +ext_modules = [ + Extension( + "pycocotools._mask", + sources=[ + "./eiseg/util/coco/common/maskApi.c", + "./eiseg/util/coco/pycocotools/_mask.pyx", + ], + include_dirs=[np.get_include(), "./eiseg/util/coco/common"], + extra_compile_args=["-Wno-cpp", "-Wno-unused-function", "-std=c99"], + ) +] + +setup( + name=__APPNAME__, + version=__VERSION__, + description="交互式标注软件", + long_description=README, + long_description_content_type="text/markdown", + url="https://github.com/PaddleCV-SIG/EISeg", + author="PaddleCV-SIG", + author_email="linhandev@qq.com", + license="Apache Software License", + classifiers=[ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + ], + packages=find_packages(exclude=("test",)), + # packages=["EISeg"], + include_package_data=True, + install_requires=REQUIRED_PACKAGES, + entry_points={ + "console_scripts": [ + "eiseg=eiseg.run:main", + ] + }, +) diff --git a/EISeg/tool/baidu_translate.py b/EISeg/tool/baidu_translate.py new file mode 100644 index 0000000000..d272ebda13 --- /dev/null +++ b/EISeg/tool/baidu_translate.py @@ -0,0 +1,102 @@ +import json +import random +import hashlib +from urllib import parse +import http.client +from tqdm import tqdm +from collections import defaultdict + +from bs4 import BeautifulSoup as bs + + +class BaiduTranslate: + def __init__(self, fromLang, toLang): + self.url = "/api/trans/vip/translate" + self.appid = "20200311000396156" + self.secretKey = "s6c3ZeYTI9lhrwQVugnM" + self.fromLang = fromLang + self.toLang = toLang + self.salt = random.randint(32768, 65536) + + def trans(self, text): + sign = self.appid + text + str(self.salt) + self.secretKey + md = hashlib.md5() + md.update(sign.encode(encoding="utf-8")) + sign = md.hexdigest() + myurl = ( + self.url + + "?appid=" + + self.appid + + "&q=" + + parse.quote(text) + + "&from=" + + self.fromLang + + "&to=" + + self.toLang + + "&salt=" + + str(self.salt) + + "&sign=" + + sign + ) + try: + httpClient = http.client.HTTPConnection("api.fanyi.baidu.com") + httpClient.request("GET", myurl) + response = httpClient.getresponse() + html = response.read().decode("utf-8") + html = json.loads(html) + dst = html["trans_result"][0]["dst"] + return True, dst + except Exception as e: + return False, e + +def read_ts(ts_path): + xml = open(ts_path, "r", encoding="utf-8").read() + xml = bs(xml, "xml") + return xml + + +pre_ts_path = "tool/ts/English.ts" # Russia +ts_path = "tool/ts/out.ts" +pre_xml = read_ts(pre_ts_path) +xml = read_ts(ts_path) +pre_messages = pre_xml.find_all("message") +messages = xml.find_all("message") +bd_trans = BaiduTranslate("zh", "en") # ru +trans = bd_trans.trans + +translated = 0 +failed = 0 +for msg in messages: + type = msg.translation.get("type", None) + source = msg.source.string + trans = msg.translation.string + if type == "unfinished" and trans is None and source is not None: + in_pre = False + for pmsg in pre_messages: + if pmsg.source.string == source: + try: + msg.translation.string = pmsg.translation.string + translated += 1 + print( + f"{translated + failed} / {len(messages)}:{source} \t {msg.translation.string}" + ) + in_pre = True + except: + pass + break + if in_pre is False: + res = bd_trans.trans(source) + if res[0]: + msg.translation.string = res[1] + translated += 1 + else: + failed += 1 + print( + f"{translated + failed} / {len(messages)}:{source} \t {msg.translation.string}" + ) + +for name in xml.find_all("name"): + name.string = "APP_EISeg" + +print(f"Totally {len(messages)} , translated {translated}, failed {failed}") +open(ts_path, "w").write(str(xml)) \ No newline at end of file diff --git a/contrib/EISeg/tool/pypi.sh b/EISeg/tool/pypi.sh similarity index 100% rename from contrib/EISeg/tool/pypi.sh rename to EISeg/tool/pypi.sh diff --git a/EISeg/tool/translate.pro b/EISeg/tool/translate.pro new file mode 100644 index 0000000000..6eccfb1f74 --- /dev/null +++ b/EISeg/tool/translate.pro @@ -0,0 +1,3 @@ +CODECFORTR = UTF-8 +SOURCES = ../eiseg/app.py; ../eiseg/ui.py; ../eiseg/widget/shortcut.py +TRANSLATIONS = ./ts/out.ts diff --git a/EISeg/tool/ts/English.ts b/EISeg/tool/ts/English.ts new file mode 100644 index 0000000000..74b9ecfe68 --- /dev/null +++ b/EISeg/tool/ts/English.ts @@ -0,0 +1,683 @@ + + + + + APP_EISeg + + + &编辑快捷键 + &Edit Shortcuts + + + + 编辑软件快捷键 + Edit software shortcuts + + + + &上一张 + &Prev Image + + + + 翻到上一张图片 + Filp to previous image + + + + &下一张 + &Next Image + + + + 翻到下一张图片 + Flip to the next image + + + + &打开图像 + &Open Image + + + + 打开一张图像进行标注 + Open an image for annotation + + + + &打开文件夹 + &Open Dir + + + + 打开一个文件夹下所有的图像进行标注 + Open all images in a folder for annotation + + + + &改变标签保存路径 + &Change Output Dir + + + + 改变标签保存的文件夹路径 + Change the folder where labels are saved + + + + &加载模型参数 + &Load Model Parameters + + + + 加载一个模型参数 + Load a model parameter + + + + &完成当前目标 + &Finish Current Target + + + + 完成当前目标的标注 + Finish labeling the current object + + + + &清除所有标注 + &Clear All Labels + + + + 清除所有标注信息 + Clear all labels in the image + + + + &撤销 + &Undo + + + + 撤销一次点击 + Undo one click + + + + &重做 + &Redo + + + + 重做一次点击 + Redo one click + + + + &保存 + &Save + + + + 保存图像标签 + Save image label + + + + &另存为 + &Save as + + + + 指定标签保存路径 + Specify label save path + + + + &自动保存 + &Auto Save + + + + 翻页同时自动保存 + Save automatically while turning image + + + + &删除多边形 + &Delete Polygon + + + + 删除当前选中的多边形 + Deletes the currently selected polygon + + + + &保留最大连通块 + &Filter LCC + + + + 保留最大的连通块 + Keep the largest connected component only + + + + &标签和图像使用相同拓展名 + &Use same extension name + + + + 标签和图像使用相同拓展名,用于图像中有文件名相同,拓展名不同的情况 + The label and image use the same extension name, which is used when the file name in the image is the same and the extension name is different + + + + &伪彩色保存 + &Pseudo Color Format + + + + 保存为伪彩色图像 + Save label in pseudo color format + + + + &灰度保存 + &Grayscale Format + + + + 保存为灰度图像,像素的灰度为对应类型的标签 + Save label in grayscale format, the value of each pixel is the id for the label category of the pixel + + + + &JSON保存 + &JSON Format + + + + 保存为JSON格式 + Save polygon information in JSON format + + + + &COCO保存 + &Coco Format + + + + 保存为COCO格式 + Save polygon information in coco format + + + + &关闭 + &Close + + + + 关闭当前图像 + Close current image + + + + &抠图保存 + &Save Matting + + + + 只保留前景,背景设置为背景色 + Only keep foreground pixels, set all background pixels to background color + + + + &设置抠图背景色 + &Set matting background color + + + + 抠图后背景像素的颜色 + The color to use for all background pixels + + + + &退出 + &Exit + + + + 退出软件 + Exit software + + + + &保存标签列表 + &Save Label List + + + + 将标签保存成标签配置文件 + Save the labels to disk as a file which can be loaded back + + + + &加载标签列表 + &Load Label List + + + + 从标签配置文件中加载标签 + Load labels from label profile + + + + &清空标签列表 + &Clear Label List + + + + 清空所有的标签 + Clear all labels + + + + &清除标注记录 + &Clear Annotation Records + + + + 清除近期标注记录 + Clear recent annotation records + + + + &模型选择 + &Model Selection + + + + 模型选择 + Model selection + + + + &数据列表 + &Image List + + + + 数据列表 + Image List + + + + &标签列表 + &Label List + + + + 标签列表 + Label List + + + + &分割设置 + &Segmentation Setting + + + + 分割设置 + Segmentation Setting + + + + 近期文件 + Recent documents + + + + 近期模型及参数 + Recent models and parameters + + + + 语言 + Language + + + + 文件 + File + + + + 标注 + Annotation + + + + 功能 + Functions + + + + 显示 + Display + + + + 帮助 + Help + + + +APP_EISeg + + + 切换语言 + Changing language + + + + 切换语言需要重启软件才能生效 + Changing language only takes effect after restarting the app + + + + 无近期文件 + No recent documents + + + + 已清除最近打开文件 + Recently opened files have been cleared + + + + 无近期模型记录 + No recent model parameters + + + + 轻量级模型 + Lightweight model + + + + 高精度模型 + High precision model + + + + 选择模型结构 + Select model structure + + + + 尚未选择模型结构,请在右侧下拉菜单进行选择! + No model structure has been selected, please select it from the drop-down menu on the right! + + + + Paddle模型权重文件(*.pdparams) + Paddle Model Parameter File(*.pdparams) + + + + 选择模型参数 + Select model parameters + + + + 没有最近使用模型信息,请加载模型 + There is no recently used model information, please load the model + + + + 标签配置文件 + Label profile + + + + 选择标签配置文件路径 + Select label profile path + + + + 没有需要保存的标签 + There are no labels to save + + + + 请先添加标签之后再进行保存! + Please add a label before saving! + + + + 保存标签配置文件 + Save label profile + + + + 选择保存标签配置文件路径 + Select the path to save the label profile + + + + 清空标签列表? + Clear label list? + + + + 请确认是否要清空标签列表 + Please confirm you want to clear the label list + + + + 确认删除? + Confirm deletion? + + + + 确认要删除当前选中多边形标注? + Are you sure you want to delete the currently selected polygon dimension? + + + + 选择待标注图片 + Select the image to be labeled + + + + 选择待标注图片文件夹 + Select the image folder to label + + + + 没有后一张图片 + This is the last image, can't turn to the next image + + + + 没有前一张图片 + This is the first image, can't turn to the previous image + + + + 模型未加载 + Model not loaded + + + + 尚未加载模型,请先加载模型! + The model has not been loaded, please load the model first! + + + + 完成最后一个目标? + Finish the last goal? + + + + 是否完成最后一个目标的标注,不完成不会进行保存。 + Whether to complete the annotation of the last target. If not, it will not be saved. + + + + 保存标签? + Save label? + + + + 标签尚未保存,是否保存标签 + The label has not been saved. Do you want to save the label + + + + 保存标签文件路径 + Save label file path + + + + 选择标签文件保存路径 + Select the path to save the label file + + + + 标签成功保存至 + Label successfully saved to + + + + 选择标签保存路径 + Select the folder to save labels + + + + 未选择模型 + Model not selected + + + + 尚未选择模型,请先在右上角选择模型 + model not selected. Please select the model in the upper right corner first + + + + 未设置参数 + Parameters not set + + + + 尚未设置参数,请先在右上角设置参数 + Parameters not set. Please set the parameters in the upper right corner first + + + + 未选择当前标签 + The current label is not selected + + + + 请先在标签列表中单击点选标签 + Please click the label in the label list first + + + + 轻量级模型 + Lightweight model + + + + 高精度模型 + High precision model + + + + 加载网络参数 + Load Model Parameter + + + + 模型选择 + Model selection + + + + 保存 + Save + + + + 数据列表 + Image List + + + + 添加标签 + Add Label + + + + 标签列表 + Label List + + + + 分割阈值: + Segmentation Threshold: + + + + 标签透明度: + Label Transparency: + + + + 点击可视化半径: + Click Visualization Radius: + + + + 分割设置 + Segmentation Setting + + + + ShortcutWindow + + + 编辑快捷键 + Edit Keyboard Shortcuts + + + + 无 + Na + + + + 快捷键冲突 + Shortcut key conflict + + + + 快捷键已被 + shortcut has been used by + + + + 使用,请设置其他快捷键或先修改 + . Please set another key sequence or modify the keyboard shotcut of + + + + 的快捷键 + first! + + + \ No newline at end of file diff --git a/contrib/EISeg/tool/update_md5.py b/EISeg/tool/update_md5.py similarity index 100% rename from contrib/EISeg/tool/update_md5.py rename to EISeg/tool/update_md5.py diff --git a/README.md b/README.md index 87a92baa4f..a40b1d43a0 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,41 @@ English | [简体中文](README_CN.md) # PaddleSeg -[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleSeg.svg?branch=master)](https://travis-ci.org/PaddlePaddle/PaddleSeg) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases) ![python version](https://img.shields.io/badge/python-3.6+-orange.svg) ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) +## PaddleSeg has released the new version including the following features: - *[2021-06-19] PaddleSeg Team won the AutoNUE 2021 Challenge: Semantic Segmentation Track in CVPR 2021! Technical Report can be found [here](https://bj.bcebos.com/paddleseg/docs/autonue21_presentation_PaddleSeg.pdf). Code will be coming soon.* +* Our team won the AutoNUE@CVPR 2021 challenge, where the technical [report](https://bj.bcebos.com/paddleseg/docs/autonue21_presentation_PaddleSeg.pdf) and [source code](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/AutoNUE) are available. +* We released an efficient interactive annotation tool for image segmentation, named [EISeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/EISeg). +* We introduced [Panoptic-DeepLab](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PanopticDeepLab), which is a proposal-free algorithm for panoptic segmentation. +* We provided an ultra-lightweight [portrait segmentation](./contrib/PP-HumanSeg) solution for the mobile devices and even the web -![demo](./docs/images/cityscapes.gif) +## PaddleSeg Introduction Welcome to PaddleSeg! PaddleSeg is an end-to-end image segmentation development kit developed based on [PaddlePaddle](https://www.paddlepaddle.org.cn), which covers a large number of high-quality segmentation models in different directions such as *high-performance* and *lightweight*. With the help of modular design, we provide two application methods: *Configuration Drive* and *API Calling*. So one can conveniently complete the entire image segmentation application from training to deployment through configuration calls or API calls. +* ### PaddleSeg provides four image segmentation capabilities: semantic segmentation, interactive segmentation, panoptic segmentation and Matting. + +
+ +
+ + +--------------- + + * ### PaddleSeg is widely used in autonomous driving, medical, quality inspection, inspection, entertainment and other scenarios. + +
+ +
+ + +--------------- + + + ## Core Features * **High Performance Model**: Based on the high-performance backbone trained by Baidu's self-developed [semi-supervised label knowledge distillation scheme (SSLD)](https://paddleclas.readthedocs.io/zh_CN/latest/advanced_tutorials/distillation/distillation.html#ssld), combined with the state of the art segmentation technology, we provides 50+ high-quality pre-training models, which are better than other open source implementations. @@ -27,38 +50,19 @@ Welcome to PaddleSeg! PaddleSeg is an end-to-end image segmentation development ## Technical Communication * If you find any problems or have a suggestion with PaddleSeg, please send us issues through [GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues). -* Welcome to Join PaddleSeg WeChat Group (left) and QQ Group (right) +* Welcome to Join PaddleSeg QQ Group +
+ +
+ +## Model Instraction + +[Model Zoo](./configs/) +
- +
-## Model Zoo - -|Model\Backbone|ResNet50|ResNet101|HRNetw18|HRNetw48| -|-|-|-|-|-| -|[ANN](./configs/ann)|✔|✔||| -|[BiSeNetv2](./configs/bisenet)|-|-|-|-| -|[DANet](./configs/danet)|✔|✔||| -|[Deeplabv3](./configs/deeplabv3)|✔|✔||| -|[Deeplabv3P](./configs/deeplabv3p)|✔|✔||| -|[Fast-SCNN](./configs/fastscnn)|-|-|-|-| -|[FCN](./configs/fcn)|||✔|✔| -|[GCNet](./configs/gcnet)|✔|✔||| -|[GSCNN](./configs/gscnn)|✔|✔||| -|[HarDNet](./configs/hardnet)|-|-|-|-| -|[OCRNet](./configs/ocrnet/)|||✔|✔| -|[PSPNet](./configs/pspnet)|✔|✔||| -|[U-Net](./configs/unet)|-|-|-|-| -|[U2-Net](./configs/u2net)|-|-|-|-| -|[Att U-Net](./configs/attention_unet)|-|-|-|-| -|[U-Net++](./configs/unet_plusplus)|-|-|-|-| -|[U-Net3+](./configs/unet_3plus)|-|-|-|-| -|[DecoupledSegNet](./configs/decoupled_segnet)|✔|✔||| -|[EMANet](./configs/emanet)|✔|✔|-|-| -|[ISANet](./configs/isanet)|✔|✔|-|-| -|[DNLNet](./configs/dnlnet)|✔|✔|-|-| -|[SFNet](./configs/sfnet)|✔|-|-|-| -|[PPSegLite](./configs/ppseg_lite)|-|-|-|-| ## Dataset @@ -71,17 +75,19 @@ Welcome to PaddleSeg! PaddleSeg is an end-to-end image segmentation development ## Tutorials * [Installation](./docs/install.md) -* [Get Started](./docs/quick_start.md) -* Data Processing - * [Data Format Description](./docs/data/marker/marker_c.md) - * [Data Annotation and Transform](./docs/data/transform/transform_c.md) +* [Get Started](./docs/whole_process.md) +* Prepare Datasets + * [Preparation of Annotation Data](./docs/data/marker/marker.md) + * [Annotating Tutorial](./docs/data/transform/transform.md) * [Custom Dataset](./docs/data/custom/data_prepare.md) -* Design Idea of PaddleSeg +* Custom Software Development of PaddleSeg * [Detailed Configuration File](./docs/design/use/use.md) * [Create Your Own Model](./docs/design/create/add_new_model.md) * [Model Training](/docs/train/train.md) * [Model Evaluation](./docs/evaluation/evaluate/evaluate.md) +* [Prediction](./docs/predict/predict.md) +* [Model Export](./docs/export/export/model_export.md) * Model Deploy * [Export Model](./docs/model_export.md) @@ -93,15 +99,14 @@ Welcome to PaddleSeg! PaddleSeg is an end-to-end image segmentation development * [Benchmark](./docs/deployment/inference/infer_benchmark.md) * [Export ONNX Model](./docs/model_export_onnx.md) * Model Compression - * [Distillation](./docs/slim/distill/distill.md) * [Quantization](./docs/slim/quant/quant.md) * [Prune](./docs/slim/prune/prune.md) * API Tutorial - * [API Documention](./docs/apis) + * [API Documention](./docs/apis/README.md) * [API Application](./docs/api_example.md) * Description of Important Modules * [Data Augmentation](./docs/module/data/data.md) - * [Loss Description](./docs/module/loss/lovasz_loss.md) + * [Loss Description](./docs/module/loss/losses_en.md) * [Tricks](./docs/module/tricks/tricks.md) * Description of Classical Models * [DeeplabV3](./docs/models/deeplabv3.md) @@ -147,9 +152,9 @@ python train.py --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml ## Practical Cases -* [HumanSeg](./contrib/HumanSeg) +* [PP-HumanSeg](./contrib/PP-HumanSeg) * [Cityscapes SOTA](./contrib/CityscapesSOTA) -* [PanopticSegmentation](./contrib/PanopticDeepLab) +* [Panoptic Segmentation](./contrib/PanopticDeepLab) ## Feedbacks and Contact * The dynamic version is still under development, if you find any issue or have an idea on new features, please don't hesitate to contact us via [GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues). diff --git a/README_CN.md b/README_CN.md index 34176a0b9f..17107be3e0 100644 --- a/README_CN.md +++ b/README_CN.md @@ -2,44 +2,41 @@ # PaddleSeg -[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleSeg.svg?branch=master)](https://travis-ci.org/PaddlePaddle/PaddleSeg) +[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleSeg.svg?branch=release/2.1)](https://travis-ci.org/PaddlePaddle/PaddleSeg) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases) ![python version](https://img.shields.io/badge/python-3.6+-orange.svg) ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) - *[2021-06-19] PaddleSeg团队获得CVPR2021 AutoNUE语义分割赛道冠军! 已发布[演讲报告](https://bj.bcebos.com/paddleseg/docs/autonue21_presentation_PaddleSeg.pdf)。详细的技术报告和源码即将公布。* -![demo](./docs/images/cityscapes.gif) +## PaddleSeg重磅发布2.2版本,欢迎体验 +* PaddleSeg团队在CVPR2021 AutoNUE语义分割赛道中获得冠军! 已发布[演讲报告](https://bj.bcebos.com/paddleseg/docs/autonue21_presentation_PaddleSeg.pdf)和[源代码](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/AutoNUE)。 +* 发布了交互式分割的智能标注工具 [EISeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/EISeg)。极大的提升了标注效率; +* 开源了全景分割算法[Panoptic-DeepLab](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PanopticDeepLab)丰富了模型种类; +* 全新升级了[人像分割](./contrib/PP-HumanSeg)功能,提供了web端超轻量模型部署方案 + +## PaddleSeg介绍 PaddleSeg是基于飞桨[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的端到端图像分割开发套件,涵盖了**高精度**和**轻量级**等不同方向的大量高质量分割模型。通过模块化的设计,提供了**配置化驱动**和**API调用**两种应用方式,帮助开发者更便捷地完成从训练到部署的全流程图像分割应用。 -## 重要活动提醒 +* ### PaddleSeg提供了语义分割、交互式分割、全景分割、Matting四大图像分割能力。 -✨直播课预告--全球冠军带你实现产业级图像分割✨ +
+ +
-* 直播链接:http://live.bilibili.com/21689802 -* Day① 7.13 20:15-21:30 - * 主题:顶会冠军图像分割算法深度解密 - * 语义分割全系列算法讲解 - * CVPR冠军解析自动驾驶场景理解方案 - * 揭秘百度全新Transformer算法 +--------------- -* Day② 7.14 20:15-21:30 - * 主题:高精度人像分割算法及应用 - * 实时人像分割技术难点及实现 - * Web端超轻量模型设计方案 - * 精细化分割Matting技术详解 + * ### PaddleSeg被广泛地应用在自动驾驶、医疗、质检、巡检、娱乐等场景。 -* Day③ 7.15 20:15-21:30 - * 主题:交互式分割及破圈应用 - * 交互式分割算法及优化技巧 - * 高效智能标注功能的实现 - * 交互式分割的产业创新应用 +
+ +
+---------------- ## 特性 @@ -49,58 +46,55 @@ PaddleSeg是基于飞桨[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的 * **高性能**:支持多进程异步I/O、多卡并行训练、评估等加速策略,结合飞桨核心框架的显存优化功能,可大幅度减少分割模型的训练开销,让开发者更低成本、更高效地完成图像分割训练。 * :heart:**您可以前往 [完整PaddleSeg在线使用文档目录](https://paddleseg.readthedocs.io) 获得更详细的说明文档**:heart: +---------- + + +## 直播课回放 + +✨直播课回放--全球冠军带你实现产业级图像分割✨ + +* 学习链接:https://aistudio.baidu.com/aistudio/education/group/info/24590 + +* Day① 顶会冠军图像分割算法深度解密 + +* Day② 高精度人像分割算法及应用 + +* Day③ 交互式分割及破圈应用 ## 技术交流 * 如果你发现任何PaddleSeg存在的问题或者是建议, 欢迎通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues)给我们提issues。 -* 欢迎加入PaddleSeg微信群(左侧)和QQ群(右侧) +* 欢迎加入PaddleSegQQ群 +
+ +
+ +## 模型说明 + +[Model Zoo](./configs/) +
- +
-## 模型库 - -|模型\骨干网络|ResNet50|ResNet101|HRNetw18|HRNetw48| -|-|-|-|-|-| -|[ANN](./configs/ann)|✔|✔||| -|[BiSeNetv2](./configs/bisenet)|-|-|-|-| -|[DANet](./configs/danet)|✔|✔||| -|[Deeplabv3](./configs/deeplabv3)|✔|✔||| -|[Deeplabv3P](./configs/deeplabv3p)|✔|✔||| -|[Fast-SCNN](./configs/fastscnn)|-|-|-|-| -|[FCN](./configs/fcn)|||✔|✔| -|[GCNet](./configs/gcnet)|✔|✔||| -|[GSCNN](./configs/gscnn)|✔|✔||| -|[HarDNet](./configs/hardnet)|-|-|-|-| -|[OCRNet](./configs/ocrnet/)|||✔|✔| -|[PSPNet](./configs/pspnet)|✔|✔||| -|[U-Net](./configs/unet)|-|-|-|-| -|[U2-Net](./configs/u2net)|-|-|-|-| -|[Att U-Net](./configs/attention_unet)|-|-|-|-| -|[U-Net++](./configs/unet_plusplus)|-|-|-|-| -|[U-Net3+](./configs/unet_3plus)|-|-|-|-| -|[DecoupledSegNet](./configs/decoupled_segnet)|✔|✔||| -|[EMANet](./configs/emanet)|✔|✔|-|-| -|[ISANet](./configs/isanet)|✔|✔|-|-| -|[DNLNet](./configs/dnlnet)|✔|✔|-|-| -|[SFNet](./configs/sfnet)|✔|-|-|-| -|[PPSegLite](./configs/ppseg_lite)|-|-|-|-| ## 使用教程 -* [安装](./docs/install.md) -* [全流程跑通PaddleSeg](./docs/quick_start.md) -* 数据处理 - * [数据格式说明](./docs/data/marker/marker_c.md) - * [数据标注和转换](./docs/data/transform/transform_c.md) - * [自定义数据集](./docs/data/custom/data_prepare.md) - -* PaddleSeg的设计思想 - * [配置文件详解](./docs/design/use/use.md) - * [如何创造自己的模型](./docs/design/create/add_new_model.md) -* [模型训练](/docs/train/train.md) +* [安装](./docs/install_cn.md) +* [全流程跑通PaddleSeg](./docs/whole_process_cn.md) +* 准备数据集 + * [标注数据的准备](./docs/data/marker/marker_cn.md) + * [数据标注教程](./docs/data/transform/transform_cn.md) + * [自定义数据集](./docs/data/custom/data_prepare_cn.md) + +* PaddleSeg二次开发教程 + * [配置文件详解](./docs/design/use/use_cn.md) + * [如何创造自己的模型](./docs/design/create/add_new_model_cn.md) +* [模型训练](/docs/train/train_cn.md) * [模型评估](./docs/evaluation/evaluate/evaluate.md) +* [预测与可视化](./docs/predict/predict_cn.md) +* [模型导出](./docs/export/export/model_export.md) * 模型部署 * [导出预测模型](./docs/model_export.md) @@ -112,15 +106,14 @@ PaddleSeg是基于飞桨[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的 * [推理Benchmark](./docs/deployment/inference/infer_benchmark.md) * [导出ONNX模型](./docs/model_export_onnx.md) * 模型压缩 - * [蒸馏](./docs/slim/distill/distill.md) * [量化](./docs/slim/quant/quant.md) * [裁剪](./docs/slim/prune/prune.md) * API使用教程 - * [API文档说明](./docs/apis) + * [API文档说明](./docs/apis/README_CN.md) * [API应用案例](./docs/api_example.md) * 重要模块说明 * [数据增强](./docs/module/data/data.md) - * [Loss说明](./docs/module/loss/lovasz_loss.md) + * [Loss说明](./docs/module/loss/losses_cn.md) * [Tricks](./docs/module/tricks/tricks.md) * 经典模型说明 * [DeeplabV3](./docs/models/deeplabv3.md) @@ -128,20 +121,20 @@ PaddleSeg是基于飞桨[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的 * [OCRNet](./docs/models/ocrnet.md) * [Fast-SCNN](./docs/models/fascnn.md) * [提交PR说明](./docs/pr/pr/pr.md) -* [FAQ](./docs/faq/faq/faq.md) +* [FAQ](./docs/faq/faq/faq_cn.md) ## 实践案例 -* [人像分割](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/contrib/HumanSeg) -* [医疗图像](./docs/solution/medical/medical.md) -* [遥感分割](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/contrib/remote_sensing) -* [全景分割](./contrib/PanopticDeepLab) +- [人像分割](./contrib/PP-HumanSeg) +- [医疗图像](./docs/solution/medical/medical.md) +- [遥感分割](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/contrib/remote_sensing) +- [全景分割](./contrib/PanopticDeepLab) ## 代码贡献 -* 非常感谢[jm12138](https://github.com/jm12138)贡献U2-Net模型。 -* 非常感谢[zjhellofss](https://github.com/zjhellofss)(傅莘莘)贡献Attention U-Net模型,和Dice loss损失函数。 -* 非常感谢[liuguoyu666](https://github.com/liguoyu666)贡献U-Net++模型。 +- 非常感谢[jm12138](https://github.com/jm12138)贡献U2-Net模型。 +- 非常感谢[zjhellofss](https://github.com/zjhellofss)(傅莘莘)贡献Attention U-Net模型,和Dice loss损失函数。 +- 非常感谢[liuguoyu666](https://github.com/liguoyu666)贡献U-Net++模型。 ## 学术引用 diff --git a/script/run_fp16.sh b/benchmark/run_fp16.sh similarity index 100% rename from script/run_fp16.sh rename to benchmark/run_fp16.sh diff --git a/script/run_fp32.sh b/benchmark/run_fp32.sh similarity index 100% rename from script/run_fp32.sh rename to benchmark/run_fp32.sh diff --git a/configs/README.md b/configs/README.md index a36b3154b5..e2bd232a38 100644 --- a/configs/README.md +++ b/configs/README.md @@ -1,3 +1,71 @@ +# 模型库 + +|模型\骨干网络|ResNet50|ResNet101|HRNetw18|HRNetw48| +|-|-|-|-|-| +|[ANN](./ann)|✔|✔||| +|[BiSeNetv2](./bisenet)|-|-|-|-| +|[DANet](./danet)|✔|✔||| +|[Deeplabv3](./deeplabv3)|✔|✔||| +|[Deeplabv3P](./deeplabv3p)|✔|✔||| +|[Fast-SCNN](./fastscnn)|-|-|-|-| +|[FCN](./fcn)|||✔|✔| +|[GCNet](./gcnet)|✔|✔||| +|[GSCNN](./gscnn)|✔|✔||| +|[HarDNet](./hardnet)|-|-|-|-| +|[OCRNet](./ocrnet/)|||✔|✔| +|[PSPNet](./pspnet)|✔|✔||| +|[U-Net](./unet)|-|-|-|-| +|[U2-Net](./u2net)|-|-|-|-| +|[Att U-Net](./attention_unet)|-|-|-|-| +|[U-Net++](./unet_plusplus)|-|-|-|-| +|[U-Net3+](./unet_3plus)|-|-|-|-| +|[DecoupledSegNet](./decoupled_segnet)|✔|✔||| +|[EMANet](./emanet)|✔|✔|-|-| +|[ISANet](./isanet)|✔|✔|-|-| +|[DNLNet](./dnlnet)|✔|✔|-|-| +|[SFNet](./sfnet)|✔|-|-|-| +|[PP-HumanSeg-Lite](./pp_humanseg_lite)|-|-|-|-| + +# 模型说明 + +
+ +
+ +# 模型性能参数 + + +|Model|Backbone|Resolution|Training Iters|mIoU|mIoU(flip)|mIoU(ms+flip)|predict_time(ms)| +|-|-|-|-|-|-|-|-| +|ANN|ResNet101|1024x512|80000|79.50%|79.77%|79.69%|365| +|BiSeNetv2|/|1024x1024|160000|73.19%|74.19%|74.43%|12| +|DANet|ResNet50|1024x512|80000|80.27%|80.53%|/|475| +|Deeplabv3|ResNet101_OS8|1024x512|80000|80.85%|81.09%|81.54%|314| +|Deeplabv3P|ResNet50_OS8|1024x512|80000|81.10%|81.38%|81.24%|157| +|Fast-SCNN|/|1024x1024|160000|69.31%|/|/|28| +|FCN|HRNet_W48|1024x512|80000|80.70%|81.24%|81.56%|49| +|GCNet|ResNet101_OS8|1024x512|80000|81.01%|81.30%|81.64%|339| +|GSCNN|ResNet50_OS8|1024x512|80000|80.67%|80.88%|80.88%|/| +|HarDNet|/|1024x1024|160000|79.03%|79.49%|79.76%|30| +|OCRNet|HRNet_W48|1024x512|160000|82.15%|82.59%|82.85%|79| +|PSPNet|ResNet101_OS8|1024x512|80000|80.48%|80.74%|81.04%|415| +|U-Net|/|1024x512|160000|65.00%|66.02%|66.89%|63| +|U^2-Net|/|1024x512|160000|71.65%|/|148| +|Att U-Net|/|/|1024x512|/|/|/|/|/| +|U-Net++|/|1024x512|/|/|/|/|/| +|DecoupledSegNet|ResNet50_OS8|1024x512|80000|81.26%|81.56%|81.80%|239| +|EMANet|ResNet101_OS8|1024x512|80000|80.00%|80.23%|80.53%|303| +|ISANet|ResNet101_OS8|769x769|80000|80.10%|80.30%|80.26%|304| +|DNLNet|ResNet101_OS8|1024x512|80000|81.03%|81.38%|/|303| +|SFNet|ResNet18_OS8|1024x1024|80000|81.49%|81.63%|81.85%|28| + + + +- 表格展示了PaddleSeg所实现的分割模型在取得最高分类精度的配置下的一些评价参数。 +- 其中,mIoU、mIoU(flip)、mIoU(ms+flip)是对模型进行评估的结果。`ms` 表示**multi-scale**,即使用三种scale [0.75, 1.0, 1.25];`flip`表示水平翻转。 +- 推理时间是使用CityScapes数据集中的图像进行100次预测取平均值的结果。 +- 测试条件为Tesla V100 16GB。 + # 配置项 ---- @@ -70,56 +138,4 @@ > * 参数 > * transforms : 预测时的预处理操作,支持配置的transforms与`train_dataset`、`val_dataset`等相同。如果不填写该项,默认只会对数据进行归一化标准化操作。 -# 示例 - -```yaml -batch_size: 4 -iters: 80000 - -train_dataset: - type: Cityscapes - dataset_root: data/cityscapes - transforms: - - type: ResizeStepScaling - min_scale_factor: 0.5 - max_scale_factor: 2.0 - scale_step_size: 0.25 - - type: RandomPaddingCrop - crop_size: [1024, 512] - - type: RandomHorizontalFlip - - type: Normalize - mode: train - -val_dataset: - type: Cityscapes - dataset_root: data/cityscapes - transforms: - - type: Normalize - mode: val - -optimizer: - type: sgd - momentum: 0.9 - weight_decay: 4.0e-5 - -lr_scheduler: - type: PolynomialDecay - learning_rate: 0.01 - power: 0.9 - end_lr: 0 - -loss: - types: - - type: CrossEntropyLoss - coef: [1] - -model: - type: FCN - backbone: - type: HRNet_W18 - pretrained: pretrained_model/hrnet_w18_ssld - num_classes: 19 - pretrained: Null - backbone_indices: [-1] - -``` +具体配置文件说明请参照[配置文件详解](../docs/design/use/use_cn.md) diff --git a/configs/pp_humanseg_lite/README.md b/configs/pp_humanseg_lite/README.md new file mode 100644 index 0000000000..9c7c8f5c3c --- /dev/null +++ b/configs/pp_humanseg_lite/README.md @@ -0,0 +1,6 @@ +# PP-HumanSeg-Lite + +自研超轻量级模型,适用于Web端或移动端实时分割场景。 + +## Performance +Refer to [人像分割PP-HumanSeg](../../contrib/PP-HumanSeg). diff --git a/configs/pp_humanseg_lite/pp_humanseg_lite_export_398x224.yml b/configs/pp_humanseg_lite/pp_humanseg_lite_export_398x224.yml new file mode 100644 index 0000000000..3d17b74caa --- /dev/null +++ b/configs/pp_humanseg_lite/pp_humanseg_lite_export_398x224.yml @@ -0,0 +1,22 @@ + +model: + type: PPSegLite + align_corners: False + num_classes: 2 + +export: + transforms: + - type: Resize + target_size: [398, 224] + - type: Normalize + +val_dataset: + type: Dataset + dataset_root: data/mini_supervisely + val_path: data/mini_supervisely/val.txt + num_classes: 2 + transforms: + - type: Resize + target_size: [398, 224] + - type: Normalize + mode: val diff --git a/configs/pp_humanseg_lite/pp_humanseg_lite_mini_supervisely.yml b/configs/pp_humanseg_lite/pp_humanseg_lite_mini_supervisely.yml new file mode 100644 index 0000000000..5dca39f002 --- /dev/null +++ b/configs/pp_humanseg_lite/pp_humanseg_lite_mini_supervisely.yml @@ -0,0 +1,56 @@ +batch_size: 64 +iters: 2000 + +train_dataset: + type: Dataset + dataset_root: data/mini_supervisely + train_path: data/mini_supervisely/train.txt + num_classes: 2 + transforms: + - type: Resize + target_size: [398, 224] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.4 + contrast_range: 0.4 + saturation_range: 0.4 + - type: Normalize + mode: train + +val_dataset: + type: Dataset + dataset_root: data/mini_supervisely + val_path: data/mini_supervisely/val.txt + num_classes: 2 + transforms: + - type: Resize + target_size: [398, 224] + - type: Normalize + mode: val + +export: + transforms: + - type: Resize + target_size: [398, 224] + - type: Normalize + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0005 + +lr_scheduler: + type: PolynomialDecay + learning_rate: 0.05 + end_lr: 0 + power: 0.9 + +loss: + types: + - type: CrossEntropyLoss + coef: [1] + +model: + type: PPSegLite + align_corners: False + num_classes: 2 diff --git a/configs/swin_transformer/swin_transformer_fcn_base_patch4_window7_160k_cityscapes.yml b/configs/swin_transformer/swin_transformer_fcn_base_patch4_window7_160k_cityscapes.yml deleted file mode 100644 index 2c35b9d984..0000000000 --- a/configs/swin_transformer/swin_transformer_fcn_base_patch4_window7_160k_cityscapes.yml +++ /dev/null @@ -1,23 +0,0 @@ -_base_: '../_base_/cityscapes.yml' - -batch_size: 1 -iters: 160000 - -model: - type: FCN - backbone: - type: SwinTransformer_base_patch4_window7_224 - ape: False - drop_path_rate: 0.3 - patch_norm: True - pretrained: https://bj.bcebos.com/paddleseg/dygraph/swin_transformer_base_patch4_window7_224_imagenet_1k.tar.gz - out_indices: [3] - num_classes: 19 - -optimizer: - weight_decay: 0.0001 - -loss: - types: - - type: CrossEntropyLoss - coef: [1] diff --git a/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_autonue.yml b/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_autonue.yml deleted file mode 100644 index 1d084ceb7e..0000000000 --- a/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_autonue.yml +++ /dev/null @@ -1,21 +0,0 @@ -_base_: '../_base_/autonue.yml' - -model: - type: MLATransformer - backbone: - type: SwinTransformer_base_patch4_window7_224 - ape: False - drop_path_rate: 0.3 - patch_norm: True - pretrained: https://bj.bcebos.com/paddleseg/dygraph/swin_transformer_base_patch4_window7_224_imagenet_1k.tar.gz - num_classes: 26 - in_channels: [128, 256, 512, 1024] - mlahead_channels: 128 - -iters: 160000 - -loss: - types: - - type: CrossEntropyLoss - - type: CrossEntropyLoss - coef: [1, 0.4] diff --git a/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_cityscapes.yml b/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_cityscapes.yml deleted file mode 100644 index ce38c460f1..0000000000 --- a/configs/swin_transformer/swin_transformer_mla_base_patch4_window7_160k_cityscapes.yml +++ /dev/null @@ -1,21 +0,0 @@ -_base_: '../_base_/cityscapes.yml' - -model: - type: MLATransformer - backbone: - type: SwinTransformer_base_patch4_window7_224 - ape: False - drop_path_rate: 0.3 - patch_norm: True - pretrained: https://bj.bcebos.com/paddleseg/dygraph/swin_transformer_base_patch4_window7_224_imagenet_1k.tar.gz - num_classes: 19 - in_channels: [128, 256, 512, 1024] - mlahead_channels: 128 - -iters: 160000 - -loss: - types: - - type: CrossEntropyLoss - - type: CrossEntropyLoss - coef: [1, 0.4] diff --git a/contrib/AutoNUE/README.md b/contrib/AutoNUE/README.md new file mode 100644 index 0000000000..0e8bc3ed79 --- /dev/null +++ b/contrib/AutoNUE/README.md @@ -0,0 +1,128 @@ +# AutoNUE@CVPR 2021 Challenge +Implementation of the 1st solution for AutoNUE@CVPR 2021 Challenge Semenatic Segmentation Track based on PaddlePaddle. + +## Installation + +#### step 1. Install PaddlePaddle + +System Requirements: +* PaddlePaddle >= 2.0.0 +* Python >= 3.6+ + +Highly recommend you install the GPU version of PaddlePaddle, due to large overhead of segmentation models, otherwise it could be out of memory while running the models. For more detailed installation tutorials, please refer to the official website of [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/2.0/install/)。 + + +#### step 2. Install PaddleSeg + +You should use *API Calling* method to install PaddleSeg for flexible development. + +```shell +pip install paddleseg -U +``` + +## Data Preparation + +Firstly, you need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5) following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling. + +And then, you need to organize data following the below structure. + + IDD_Segmentation + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + +We make three contributions and managed to rank 1st. +- Progressively Segmentation +- Leverage IDD_Detection Dataset to generate extre training samples by pseudo-labeling. +- Decoder-enhanced Swin Transformer + +## Training + +### Baseline +1. Download pretrained weights on Mapillary. + +```shell +mkdir -p pretrain && cd pretrain +wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/ocrnet_hrnetw48_mapillary/pretrained.pdparams +cd .. +``` +2. Modify `scripts/train.py` line 27 with `from core.val import evaluate` +3. Run the training script. +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \ +--config configs/sscale_auto_nue_map+city@1920.yml --use_vdl \ +--save_dir saved_model/sscale_auto_nue_map+city@1920 --save_interval 2000 --num_workers 2 --do_eval +``` + +### Regional progressive segmentation +1. Replace `scripts/train.py` line 27 'from core.val import evaluate' with `from core.val_crop import evaluate` +2. Run +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \ +--config configs/auto_nue_map+city_crop.yml --use_vdl \ +--save_dir saved_model/auto_nue_map+city_crop --save_interval 2000 --num_workers 2 --do_eval +``` + +### Pseudo-labeling +First you need to organize the IDD_Detection dataset as follow: + + + IDD_Detection + | + |--JPEGImages + |--Annotations + + +where `JPEGImages` and `Annotation` are images and xml files collected from `IDD_Detection/FrontFar` and `IDD_Detection/FrontNear` two folders. + +And Then: +1. Replace `AutoNUE21/predict.py` line 22 `from paddleseg.core import predict` with `from core.predict_generate_autolabel.py import predictAutolabel` +2. Modity `AutoNUE21/predict.py` line 156 `predict(` with `predictAutolabel(` +3. Run +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict.py --config configs/sscale_auto_nue_map+city@1920.yml --model_path saved_model/sscale_auto_nue_map+city@1920/best_model/model.pdparams --image_path data/IDD_Detection/JPEGImages --save_dir detection_out --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal +``` +4. Auto-box `traffic lights` and `traffic sign` two classes from bounding box annotation by running `tools/IDD_labeling.py` +5. Put the generated `pred_refine` folder under `data/IDD_Detection` +5. Modify `scripts/train.py` line 27 with `from core.val import evaluate` +6. Train these pseudo labels with fine-annotated sample: +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \ +--config configs/auto_nue_auto_label.yml --use_vdl \ +--save_dir saved_model/auto_nue_auto_label --save_interval 2000 --num_workers 2 --do_eval +``` + +### Decoder-enhanced Swin Transformer + +1. Download pretrained weights on Mapillary. + +```shell +cd pretrain +wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/swin_mla_p4w7_mapillary/pretrained_swin.pdparams +cd .. +``` + +2. Run the training script. +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \ +--config configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml --use_vdl \ +--save_dir saved_model/swin_transformer_mla_autonue --save_interval 2000 --num_workers 2 --do_eval +``` +3. Run the testing script. +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict.py --config configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml --model_path saved_model/swin_transformer_mla_autonue/best_model/model.pdparams --image_path data/IDD_Segmentation/leftImg8bit/test/ --save_dir test_out_swin --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal +``` + +## Ensemble Testing +We provide a predict script for ensembling `baseline`, `pseudo-labeling` and `rps`. +Just running: +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m paddle.distributed.launch predict_ensemble_three.py --config configs/sscale_auto_nue_map+city@1920.yml --config_1 configs/auto_nue_auto_label.yml --config_crop configs/auto_nue_map+city_crop.yml --model_path saved_model/sscale_auto_nue_map+city@1920/best_model/model.pdparams --model_path_1 saved_model/auto_nue_auto_label/best_model/model.pdparams --model_path_crop saved_model/auto_nue_map+city_crop/best_model/model.pdparams --image_path data/IDD_Segmentation/leftImg8bit/test/ --save_dir test_out --aug_pred --scales 1.0 1.5 2.0 --flip_horizontal +``` diff --git a/contrib/AutoNUE/configs/auto_nue_auto_label.yml b/contrib/AutoNUE/configs/auto_nue_auto_label.yml new file mode 100755 index 0000000000..5f4cabb720 --- /dev/null +++ b/contrib/AutoNUE/configs/auto_nue_auto_label.yml @@ -0,0 +1,74 @@ +batch_size: 1 +iters: 80000 + +model: + type: MscaleOCRNet + pretrained: pretrain/pretrained.pdparams + n_scales: [1.0] + backbone: + type: HRNet_W48_NV + num_classes: 26 + backbone_indices: [0] + +train_dataset: + type: AutoNueAutolabel + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0 + - type: RandomPaddingCrop + crop_size: [1920, 1080] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.25 + brightness_prob: 1 + contrast_range: 0.25 + contrast_prob: 1 + saturation_range: 0.25 + saturation_prob: 1 + hue_range: 63 + hue_prob: 1 + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: train + + +val_dataset: + type: AutoNueAutolabel + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: val + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0001 + +learning_rate: + value: 0.02 + decay: + type: poly + power: 2 + end_lr: 0.0 + +loss: + types: + - type: DiceLoss + - type: DiceLoss + - type: BootstrappedCrossEntropyLoss + min_K: 50000 + loss_th: 0.05 + - type: BootstrappedCrossEntropyLoss + min_K: 50000 + loss_th: 0.05 + coef: [0.4, 0.16, 1.0, 0.4] diff --git a/contrib/AutoNUE/configs/auto_nue_map+city_crop.yml b/contrib/AutoNUE/configs/auto_nue_map+city_crop.yml new file mode 100755 index 0000000000..23ee156ea3 --- /dev/null +++ b/contrib/AutoNUE/configs/auto_nue_map+city_crop.yml @@ -0,0 +1,68 @@ +batch_size: 1 +iters: 85000 + +model: + type: MscaleOCRNet + pretrained: pretrain/pretrained.pdparams + n_scales: [1.0] + backbone: + type: HRNet_W48_NV + num_classes: 26 + backbone_indices: [0] + +train_dataset: + type: AutoNueCrop + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [3200, 1800] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.25 + brightness_prob: 1 + contrast_range: 0.25 + contrast_prob: 1 + saturation_range: 0.25 + saturation_prob: 1 + hue_range: 63 + hue_prob: 1 + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: train + + +val_dataset: + type: AutoNueCrop + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [3200, 1800] + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: val + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0001 + +learning_rate: + value: 0.005 + decay: + type: poly + power: 2 + end_lr: 0.0 + +loss: + types: + - type: DiceLoss + - type: DiceLoss + - type: BootstrappedCrossEntropyLoss + min_K: 50000 + loss_th: 0.05 + - type: BootstrappedCrossEntropyLoss + min_K: 50000 + loss_th: 0.05 + coef: [0.4, 0.16, 1.0, 0.4] diff --git a/contrib/AutoNUE/configs/mscale_auto_nue_map+city@1920.yml b/contrib/AutoNUE/configs/mscale_auto_nue_map+city@1920.yml new file mode 100755 index 0000000000..d85824ffdd --- /dev/null +++ b/contrib/AutoNUE/configs/mscale_auto_nue_map+city@1920.yml @@ -0,0 +1,74 @@ +batch_size: 1 +iters: 80000 + +model: + type: MscaleOCRNet + pretrained: saved_model/sscale_ocr_auto_nue_map+city_ce+dice@1920/best_model/model.pdparams + n_scales: [1.0, 1.5, 2.0] + backbone: + type: HRNet_W48_NV + num_classes: 26 + backbone_indices: [0] + +train_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0 + - type: RandomPaddingCrop + crop_size: [1920, 1080] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.25 + brightness_prob: 1 + contrast_range: 0.25 + contrast_prob: 1 + saturation_range: 0.25 + saturation_prob: 1 + hue_range: 63 + hue_prob: 1 + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: train + + +val_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: val + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0001 + +learning_rate: + value: 0.005 + decay: + type: poly + power: 2 + end_lr: 0.0 + +loss: + types: + - type: DiceLoss + - type: DiceLoss + - type: BootstrappedCrossEntropyLoss + min_K: 100000 + loss_th: 0.05 + - type: BootstrappedCrossEntropyLoss + min_K: 100000 + loss_th: 0.05 + coef: [1, 0.4, 1, 0.4] diff --git a/contrib/AutoNUE/configs/sscale_auto_nue_map+city@1920.yml b/contrib/AutoNUE/configs/sscale_auto_nue_map+city@1920.yml new file mode 100755 index 0000000000..dd7b5b869b --- /dev/null +++ b/contrib/AutoNUE/configs/sscale_auto_nue_map+city@1920.yml @@ -0,0 +1,74 @@ +batch_size: 1 +iters: 80000 + +model: + type: MscaleOCRNet + pretrained: pretrain/pretrained.pdparams + n_scales: [1.0] + backbone: + type: HRNet_W48_NV + num_classes: 26 + backbone_indices: [0] + +train_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0 + - type: RandomPaddingCrop + crop_size: [1920, 1080] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.25 + brightness_prob: 1 + contrast_range: 0.25 + contrast_prob: 1 + saturation_range: 0.25 + saturation_prob: 1 + hue_range: 63 + hue_prob: 1 + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: train + + +val_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: val + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0001 + +learning_rate: + value: 0.01 + decay: + type: poly + power: 2 + end_lr: 0.0 + +loss: + types: + - type: DiceLoss + - type: DiceLoss + - type: BootstrappedCrossEntropyLoss + min_K: 100000 + loss_th: 0.05 + - type: BootstrappedCrossEntropyLoss + min_K: 100000 + loss_th: 0.05 + coef: [1.0, 0.4, 1.0, 0.4] diff --git a/contrib/AutoNUE/configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml b/contrib/AutoNUE/configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml new file mode 100644 index 0000000000..dfb5fc27dc --- /dev/null +++ b/contrib/AutoNUE/configs/swin_transformer_mla_base_patch4_window7_160k_autonue.yml @@ -0,0 +1,73 @@ +batch_size: 1 +iters: 160000 + +model: + type: MLATransformer + pretrained: pretrain/pretrained_swin.pdparams + backbone: + type: SwinTransformer_base_patch4_window7_224 + ape: False + drop_path_rate: 0.3 + patch_norm: True + num_classes: 26 + in_channels: [128, 256, 512, 1024] + mlahead_channels: 128 + +train_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [1920, 1080] + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0.25 + - type: RandomPaddingCrop + crop_size: [1024, 512] + - type: RandomHorizontalFlip + - type: RandomDistort + brightness_range: 0.25 + brightness_prob: 1 + contrast_range: 0.25 + contrast_prob: 1 + saturation_range: 0.25 + saturation_prob: 1 + hue_range: 63 + hue_prob: 1 + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: train + + +val_dataset: + type: AutoNue + dataset_root: data/IDD_Segmentation + transforms: + - type: Resize + target_size: [256, 256] #[1920, 1080] + - type: Normalize + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + mode: val + + +optimizer: + type: sgd + momentum: 0.9 + weight_decay: 0.0001 + +lr_scheduler: + type: PolynomialDecay + learning_rate: 0.005 + end_lr: 0 + power: 2 + +iters: 160000 + +loss: + types: + - type: CrossEntropyLoss + - type: CrossEntropyLoss + coef: [1, 0.4] diff --git a/legacy/contrib/HumanSeg/transforms/__init__.py b/contrib/AutoNUE/core/__init__.py old mode 100644 new mode 100755 similarity index 79% rename from legacy/contrib/HumanSeg/transforms/__init__.py rename to contrib/AutoNUE/core/__init__.py index b537d3216c..dedd6d70cb --- a/legacy/contrib/HumanSeg/transforms/__init__.py +++ b/contrib/AutoNUE/core/__init__.py @@ -1,5 +1,4 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .transforms import * -from . import functional +from .predict_ensemble import predictEnsemble + +__all__ = ['predictEnsemble'] diff --git a/contrib/AutoNUE/core/infer.py b/contrib/AutoNUE/core/infer.py new file mode 100755 index 0000000000..05ebb8ed58 --- /dev/null +++ b/contrib/AutoNUE/core/infer.py @@ -0,0 +1,262 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections.abc +from itertools import combinations + +import numpy as np +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape, transforms): + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize', 'ResizeByLong']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred, ori_shape, transforms): + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + pred = F.interpolate(pred, (h, w), mode='nearest') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +def flip_combination(flip_horizontal=False, flip_vertical=False): + """ + Get flip combination. + + Args: + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + + Returns: + list: List of tuple. The first element of tuple is whether to flip horizontally, + and the second is whether to flip vertically. + """ + + flip_comb = [(False, False)] + if flip_horizontal: + flip_comb.append((True, False)) + if flip_vertical: + flip_comb.append((False, True)) + if flip_horizontal: + flip_comb.append((True, True)) + return flip_comb + + +def tensor_flip(x, flip): + """Flip tensor according directions""" + if flip[0]: + x = x[:, :, :, ::-1] + if flip[1]: + x = x[:, :, ::-1, :] + return x + + +def slide_inference(model, im, crop_size, stride): + """ + Infer by sliding window. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + crop_size (tuple|list). The size of sliding window, (w, h). + stride (tuple|list). The size of stride, (w, h). + + Return: + Tensor: The logit of input image. + """ + h_im, w_im = im.shape[-2:] + w_crop, h_crop = crop_size + w_stride, h_stride = stride + # calculate the crop nums + rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1 + cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1 + # prevent negative sliding rounds when imgs after scaling << crop_size + rows = 1 if h_im <= h_crop else rows + cols = 1 if w_im <= w_crop else cols + # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation. + final_logit = None + count = np.zeros([1, 1, h_im, w_im]) + for r in range(rows): + for c in range(cols): + h1 = r * h_stride + w1 = c * w_stride + h2 = min(h1 + h_crop, h_im) + w2 = min(w1 + w_crop, w_im) + h1 = max(h2 - h_crop, 0) + w1 = max(w2 - w_crop, 0) + im_crop = im[:, :, h1:h2, w1:w2] + logits = model(im_crop) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0].numpy() + if final_logit is None: + final_logit = np.zeros([1, logit.shape[1], h_im, w_im]) + final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1] + count[:, :, h1:h2, w1:w2] += 1 + if np.sum(count == 0) != 0: + raise RuntimeError( + 'There are pixel not predicted. It is possible that stride is greater than crop_size' + ) + final_logit = final_logit / count + final_logit = paddle.to_tensor(final_logit) + return final_logit + + +def inference(model, + im, + ori_shape=None, + transforms=None, + is_slide=False, + stride=None, + crop_size=None): + """ + Inference for image. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + is_slide (bool): Whether to infer by sliding window. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. + If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. + """ + if not is_slide: + logits = model(im) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0] + else: + logit = slide_inference(model, im, crop_size=crop_size, stride=stride) + if ori_shape is not None: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + pred = reverse_transform(pred, ori_shape, transforms) + return pred + else: + return logit + + +def aug_inference(model, + im, + ori_shape, + transforms, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + Infer with augmentation. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + scales (float|tuple|list): Scales for resize. Default: 1. + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + is_slide (bool): Whether to infer by sliding wimdow. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: Prediction of image with shape (1, 1, h, w) is returned. + """ + if isinstance(scales, float): + scales = [scales] + elif not isinstance(scales, (tuple, list)): + raise TypeError( + '`scales` expects float/tuple/list type, but received {}'.format( + type(scales))) + final_logit = 0 + h_input, w_input = im.shape[-2], im.shape[-1] + flip_comb = flip_combination(flip_horizontal, flip_vertical) + for scale in scales: + h = int(h_input * scale + 0.5) + w = int(w_input * scale + 0.5) + im = F.interpolate(im, (h, w), mode='bilinear') + for flip in flip_comb: + im_flip = tensor_flip(im, flip) + logit = inference( + model, + im_flip, + is_slide=is_slide, + crop_size=crop_size, + stride=stride) + logit = tensor_flip(logit, flip) + logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') + + logit = F.softmax(logit, axis=1) + final_logit = final_logit + logit + + pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') + pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest') + return pred diff --git a/contrib/AutoNUE/core/infer_crop.py b/contrib/AutoNUE/core/infer_crop.py new file mode 100755 index 0000000000..90cd3ff4c7 --- /dev/null +++ b/contrib/AutoNUE/core/infer_crop.py @@ -0,0 +1,260 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections.abc +from itertools import combinations + +import numpy as np +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape, transforms): + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize', 'ResizeByLong']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred, ori_shape, transforms): + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + pred = F.interpolate(pred, (h, w), mode='nearest') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +def flip_combination(flip_horizontal=False, flip_vertical=False): + """ + Get flip combination. + + Args: + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + + Returns: + list: List of tuple. The first element of tuple is whether to flip horizontally, + and the second is whether to flip vertically. + """ + + flip_comb = [(False, False)] + if flip_horizontal: + flip_comb.append((True, False)) + if flip_vertical: + flip_comb.append((False, True)) + if flip_horizontal: + flip_comb.append((True, True)) + return flip_comb + + +def tensor_flip(x, flip): + """Flip tensor according directions""" + if flip[0]: + x = x[:, :, :, ::-1] + if flip[1]: + x = x[:, :, ::-1, :] + return x + + +def slide_inference(model, im, crop_size, stride): + """ + Infer by sliding window. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + crop_size (tuple|list). The size of sliding window, (w, h). + stride (tuple|list). The size of stride, (w, h). + + Return: + Tensor: The logit of input image. + """ + h_im, w_im = im.shape[-2:] + w_crop, h_crop = crop_size + w_stride, h_stride = stride + # calculate the crop nums + rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1 + cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1 + # prevent negative sliding rounds when imgs after scaling << crop_size + rows = 1 if h_im <= h_crop else rows + cols = 1 if w_im <= w_crop else cols + # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation. + final_logit = None + count = np.zeros([1, 1, h_im, w_im]) + for r in range(rows): + for c in range(cols): + h1 = r * h_stride + w1 = c * w_stride + h2 = min(h1 + h_crop, h_im) + w2 = min(w1 + w_crop, w_im) + h1 = max(h2 - h_crop, 0) + w1 = max(w2 - w_crop, 0) + im_crop = im[:, :, h1:h2, w1:w2] + logits = model(im_crop) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0].numpy() + if final_logit is None: + final_logit = np.zeros([1, logit.shape[1], h_im, w_im]) + final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1] + count[:, :, h1:h2, w1:w2] += 1 + if np.sum(count == 0) != 0: + raise RuntimeError( + 'There are pixel not predicted. It is possible that stride is greater than crop_size' + ) + final_logit = final_logit / count + final_logit = paddle.to_tensor(final_logit) + return final_logit + + +def inference(model, + im, + ori_shape=None, + transforms=None, + is_slide=False, + stride=None, + crop_size=None): + """ + Inference for image. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + is_slide (bool): Whether to infer by sliding window. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. + If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. + """ + if not is_slide: + logits = model(im) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0] + else: + logit = slide_inference(model, im, crop_size=crop_size, stride=stride) + if ori_shape is not None: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + pred = reverse_transform(pred, ori_shape, transforms) + return pred + else: + return logit + + +def aug_inference(model, + im, + ori_shape, + transforms, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + Infer with augmentation. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + scales (float|tuple|list): Scales for resize. Default: 1. + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + is_slide (bool): Whether to infer by sliding wimdow. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: Prediction of image with shape (1, 1, h, w) is returned. + """ + if isinstance(scales, float): + scales = [scales] + elif not isinstance(scales, (tuple, list)): + raise TypeError( + '`scales` expects float/tuple/list type, but received {}'.format( + type(scales))) + final_logit = 0 + h_input, w_input = im.shape[-2], im.shape[-1] + flip_comb = flip_combination(flip_horizontal, flip_vertical) + for scale in scales: + h = int(h_input * scale + 0.5) + w = int(w_input * scale + 0.5) + im = F.interpolate(im, (h, w), mode='bilinear') + for flip in flip_comb: + im_flip = tensor_flip(im, flip) + logit = inference( + model, + im_flip, + is_slide=is_slide, + crop_size=crop_size, + stride=stride) + logit = tensor_flip(logit, flip) + logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') + + logit = F.softmax(logit, axis=1) + final_logit = final_logit + logit + + return final_logit diff --git a/contrib/AutoNUE/core/infer_ensemble.py b/contrib/AutoNUE/core/infer_ensemble.py new file mode 100755 index 0000000000..d5e92907de --- /dev/null +++ b/contrib/AutoNUE/core/infer_ensemble.py @@ -0,0 +1,216 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections.abc +from itertools import combinations + +import numpy as np +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape, transforms): + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize', 'ResizeByLong']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred, ori_shape, transforms): + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + pred = F.interpolate(pred, (h, w), mode='nearest') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +def flip_combination(flip_horizontal=False, flip_vertical=False): + """ + Get flip combination. + + Args: + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + + Returns: + list: List of tuple. The first element of tuple is whether to flip horizontally, + and the second is whether to flip vertically. + """ + + flip_comb = [(False, False)] + if flip_horizontal: + flip_comb.append((True, False)) + if flip_vertical: + flip_comb.append((False, True)) + if flip_horizontal: + flip_comb.append((True, True)) + return flip_comb + + +def tensor_flip(x, flip): + """Flip tensor according directions""" + if flip[0]: + x = x[:, :, :, ::-1] + if flip[1]: + x = x[:, :, ::-1, :] + return x + + +def inference(model, + model_hard, + im, + ori_shape=None, + transforms=None, + is_slide=False, + stride=None, + crop_size=None): + """ + Inference for image. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + is_slide (bool): Whether to infer by sliding window. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. + If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. + """ + if not is_slide: + # logits = model(im)[0] + # logits_hard = model_hard(im)[0] + + logits = F.softmax(model(im)[0], axis=1) + logits_hard = F.softmax(model_hard(im)[0], axis=1) + + # logit_hard = logits.clone() + # for ii in range(logits.shape[0]): + # logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None) + logit = (logits + logits_hard) / 2 + # logit = logit_hard + + if ori_shape is not None: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + pred = reverse_transform(pred, ori_shape, transforms) + return pred + else: + return logit + + +def aug_inference(model, + model_hard, + im, + ori_shape, + transforms, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + Infer with augmentation. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + scales (float|tuple|list): Scales for resize. Default: 1. + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + is_slide (bool): Whether to infer by sliding wimdow. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: Prediction of image with shape (1, 1, h, w) is returned. + """ + if isinstance(scales, float): + scales = [scales] + elif not isinstance(scales, (tuple, list)): + raise TypeError( + '`scales` expects float/tuple/list type, but received {}'.format( + type(scales))) + final_logit = 0 + h_input, w_input = im.shape[-2], im.shape[-1] + flip_comb = flip_combination(flip_horizontal, flip_vertical) + for scale in scales: + h = int(h_input * scale + 0.5) + w = int(w_input * scale + 0.5) + im = F.interpolate(im, (h, w), mode='bilinear') + for flip in flip_comb: + im_flip = tensor_flip(im, flip) + logit = inference( + model, + model_hard, + im_flip, + is_slide=is_slide, + crop_size=crop_size, + stride=stride) + logit = tensor_flip(logit, flip) + logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') + + # logit = F.softmax(logit, axis=1) + final_logit = final_logit + logit + + pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') + pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest') + return pred diff --git a/contrib/AutoNUE/core/infer_ensemble_three.py b/contrib/AutoNUE/core/infer_ensemble_three.py new file mode 100644 index 0000000000..4b216d9d8f --- /dev/null +++ b/contrib/AutoNUE/core/infer_ensemble_three.py @@ -0,0 +1,214 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections.abc +from itertools import combinations + +import numpy as np +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape, transforms): + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize', 'ResizeByLong']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred, ori_shape, transforms): + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + pred = F.interpolate(pred, (h, w), mode='nearest') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +def flip_combination(flip_horizontal=False, flip_vertical=False): + """ + Get flip combination. + + Args: + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + + Returns: + list: List of tuple. The first element of tuple is whether to flip horizontally, + and the second is whether to flip vertically. + """ + + flip_comb = [(False, False)] + if flip_horizontal: + flip_comb.append((True, False)) + if flip_vertical: + flip_comb.append((False, True)) + if flip_horizontal: + flip_comb.append((True, True)) + return flip_comb + + +def tensor_flip(x, flip): + """Flip tensor according directions""" + if flip[0]: + x = x[:, :, :, ::-1] + if flip[1]: + x = x[:, :, ::-1, :] + return x + + +def inference(model, + model_hard, + im, + ori_shape=None, + transforms=None, + is_slide=False, + stride=None, + crop_size=None): + """ + Inference for image. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + is_slide (bool): Whether to infer by sliding window. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. + If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. + """ + if not is_slide: + # logits = model(im)[0] + # logits_hard = model_hard(im)[0] + + logits = F.softmax(model(im)[0], axis=1) + logits_hard = F.softmax(model_hard(im)[0], axis=1) + + # logit_hard = logits.clone() + # for ii in range(logits.shape[0]): + # logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None) + logit = logits + logits_hard + # logit = logit_hard + + if ori_shape is not None: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + pred = reverse_transform(pred, ori_shape, transforms) + return pred + else: + return logit + + +def aug_inference(model, + model_hard, + im, + ori_shape, + transforms, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + Infer with augmentation. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + scales (float|tuple|list): Scales for resize. Default: 1. + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + is_slide (bool): Whether to infer by sliding wimdow. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: Prediction of image with shape (1, 1, h, w) is returned. + """ + if isinstance(scales, float): + scales = [scales] + elif not isinstance(scales, (tuple, list)): + raise TypeError( + '`scales` expects float/tuple/list type, but received {}'.format( + type(scales))) + final_logit = 0 + h_input, w_input = im.shape[-2], im.shape[-1] + flip_comb = flip_combination(flip_horizontal, flip_vertical) + for scale in scales: + h = int(h_input * scale + 0.5) + w = int(w_input * scale + 0.5) + im = F.interpolate(im, (h, w), mode='bilinear') + for flip in flip_comb: + im_flip = tensor_flip(im, flip) + logit = inference( + model, + model_hard, + im_flip, + is_slide=is_slide, + crop_size=crop_size, + stride=stride) + logit = tensor_flip(logit, flip) + logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') + + # logit = F.softmax(logit, axis=1) + final_logit = final_logit + logit + + return final_logit diff --git a/contrib/AutoNUE/core/infer_generate_autolabel.py b/contrib/AutoNUE/core/infer_generate_autolabel.py new file mode 100755 index 0000000000..129f10501e --- /dev/null +++ b/contrib/AutoNUE/core/infer_generate_autolabel.py @@ -0,0 +1,267 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections.abc +from itertools import combinations + +import numpy as np +import paddle +import paddle.nn.functional as F + + +def get_reverse_list(ori_shape, transforms): + """ + get reverse list of transform. + + Args: + ori_shape (list): Origin shape of image. + transforms (list): List of transform. + + Returns: + list: List of tuple, there are two format: + ('resize', (h, w)) The image shape before resize, + ('padding', (h, w)) The image shape before padding. + """ + reverse_list = [] + h, w = ori_shape[0], ori_shape[1] + for op in transforms: + if op.__class__.__name__ in ['Resize', 'ResizeByLong']: + reverse_list.append(('resize', (h, w))) + h, w = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['Padding']: + reverse_list.append(('padding', (h, w))) + w, h = op.target_size[0], op.target_size[1] + if op.__class__.__name__ in ['LimitLong']: + long_edge = max(h, w) + short_edge = min(h, w) + if ((op.max_long is not None) and (long_edge > op.max_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.max_long + short_edge = int(round(short_edge * op.max_long / long_edge)) + elif ((op.min_long is not None) and (long_edge < op.min_long)): + reverse_list.append(('resize', (h, w))) + long_edge = op.min_long + short_edge = int(round(short_edge * op.min_long / long_edge)) + if h > w: + h = long_edge + w = short_edge + else: + w = long_edge + h = short_edge + return reverse_list + + +def reverse_transform(pred, ori_shape, transforms): + """recover pred to origin shape""" + reverse_list = get_reverse_list(ori_shape, transforms) + for item in reverse_list[::-1]: + if item[0] == 'resize': + h, w = item[1][0], item[1][1] + pred = F.interpolate(pred, (h, w), mode='nearest') + elif item[0] == 'padding': + h, w = item[1][0], item[1][1] + pred = pred[:, :, 0:h, 0:w] + else: + raise Exception("Unexpected info '{}' in im_info".format(item[0])) + return pred + + +def flip_combination(flip_horizontal=False, flip_vertical=False): + """ + Get flip combination. + + Args: + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + + Returns: + list: List of tuple. The first element of tuple is whether to flip horizontally, + and the second is whether to flip vertically. + """ + + flip_comb = [(False, False)] + if flip_horizontal: + flip_comb.append((True, False)) + if flip_vertical: + flip_comb.append((False, True)) + if flip_horizontal: + flip_comb.append((True, True)) + return flip_comb + + +def tensor_flip(x, flip): + """Flip tensor according directions""" + if flip[0]: + x = x[:, :, :, ::-1] + if flip[1]: + x = x[:, :, ::-1, :] + return x + + +def slide_inference(model, im, crop_size, stride): + """ + Infer by sliding window. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + crop_size (tuple|list). The size of sliding window, (w, h). + stride (tuple|list). The size of stride, (w, h). + + Return: + Tensor: The logit of input image. + """ + h_im, w_im = im.shape[-2:] + w_crop, h_crop = crop_size + w_stride, h_stride = stride + # calculate the crop nums + rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1 + cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1 + # prevent negative sliding rounds when imgs after scaling << crop_size + rows = 1 if h_im <= h_crop else rows + cols = 1 if w_im <= w_crop else cols + # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation. + final_logit = None + count = np.zeros([1, 1, h_im, w_im]) + for r in range(rows): + for c in range(cols): + h1 = r * h_stride + w1 = c * w_stride + h2 = min(h1 + h_crop, h_im) + w2 = min(w1 + w_crop, w_im) + h1 = max(h2 - h_crop, 0) + w1 = max(w2 - w_crop, 0) + im_crop = im[:, :, h1:h2, w1:w2] + logits = model(im_crop) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0].numpy() + if final_logit is None: + final_logit = np.zeros([1, logit.shape[1], h_im, w_im]) + final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1] + count[:, :, h1:h2, w1:w2] += 1 + if np.sum(count == 0) != 0: + raise RuntimeError( + 'There are pixel not predicted. It is possible that stride is greater than crop_size' + ) + final_logit = final_logit / count + final_logit = paddle.to_tensor(final_logit) + return final_logit + + +def inference(model, + im, + ori_shape=None, + transforms=None, + is_slide=False, + stride=None, + crop_size=None): + """ + Inference for image. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + is_slide (bool): Whether to infer by sliding window. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. + If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. + """ + if not is_slide: + logits = model(im) + if not isinstance(logits, collections.abc.Sequence): + raise TypeError( + "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" + .format(type(logits))) + logit = logits[0] + else: + logit = slide_inference(model, im, crop_size=crop_size, stride=stride) + if ori_shape is not None: + pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') + pred = reverse_transform(pred, ori_shape, transforms) + return pred + else: + return logit + + +def aug_inference(model, + im, + ori_shape, + transforms, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + Infer with augmentation. + + Args: + model (paddle.nn.Layer): model to get logits of image. + im (Tensor): the input image. + ori_shape (list): Origin shape of image. + transforms (list): Transforms for image. + scales (float|tuple|list): Scales for resize. Default: 1. + flip_horizontal (bool): Whether to flip horizontally. Default: False. + flip_vertical (bool): Whether to flip vertically. Default: False. + is_slide (bool): Whether to infer by sliding wimdow. Default: False. + crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. + stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. + + Returns: + Tensor: Prediction of image with shape (1, 1, h, w) is returned. + """ + if isinstance(scales, float): + scales = [scales] + elif not isinstance(scales, (tuple, list)): + raise TypeError( + '`scales` expects float/tuple/list type, but received {}'.format( + type(scales))) + final_logit = 0 + h_input, w_input = im.shape[-2], im.shape[-1] + flip_comb = flip_combination(flip_horizontal, flip_vertical) + for scale in scales: + h = int(h_input * scale + 0.5) + w = int(w_input * scale + 0.5) + im = F.interpolate(im, (h, w), mode='bilinear') + for flip in flip_comb: + im_flip = tensor_flip(im, flip) + logit = inference( + model, + im_flip, + is_slide=is_slide, + crop_size=crop_size, + stride=stride) + logit = tensor_flip(logit, flip) + logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') + + logit = F.softmax(logit, axis=1) + final_logit = final_logit + logit + + final_logit = F.softmax(final_logit, axis=1) + filte = paddle.max(final_logit, axis=1, keepdim=True).numpy() + pred = paddle.argmax( + final_logit, axis=1, keepdim=True, dtype='int32').numpy() + pred[filte < 0.9] = 255 + pred = paddle.to_tensor(pred) + pred = reverse_transform(pred, ori_shape, transforms) + return pred diff --git a/contrib/AutoNUE/core/predict_ensemble.py b/contrib/AutoNUE/core/predict_ensemble.py new file mode 100755 index 0000000000..8bf72b3a8e --- /dev/null +++ b/contrib/AutoNUE/core/predict_ensemble.py @@ -0,0 +1,151 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import cv2 +import numpy as np +import paddle + +from paddleseg import utils +import core.infer_ensemble as infer_ensemble +from paddleseg.utils import logger, progbar + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +def partition_list(arr, m): + """split the list 'arr' into m pieces""" + n = int(math.ceil(len(arr) / float(m))) + return [arr[i:i + n] for i in range(0, len(arr), n)] + + +def predictEnsemble(model, + model_hard, + model_path, + model_path_hard, + transforms, + image_list, + image_dir=None, + save_dir='output', + aug_pred=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + predict and visualize the image_list. + + Args: + model (nn.Layer): Used to predict for input image. + model_path (str): The path of pretrained model. + transforms (transform.Compose): Preprocess for input image. + image_list (list): A list of image path to be predicted. + image_dir (str, optional): The root directory of the images predicted. Default: None. + save_dir (str, optional): The directory to save the visualized results. Default: 'output'. + aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False. + scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0. + flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True. + flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False. + is_slide (bool, optional): Whether to predict by sliding window. Default: False. + stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + + """ + utils.utils.load_entire_model(model, model_path) + model.eval() + utils.utils.load_entire_model(model_hard, model_path_hard) + model_hard.eval() + nranks = paddle.distributed.get_world_size() + local_rank = paddle.distributed.get_rank() + if nranks > 1: + img_lists = partition_list(image_list, nranks) + else: + img_lists = [image_list] + + added_saved_dir = os.path.join(save_dir, 'added_prediction') + pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction') + + logger.info("Start to predict...") + progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1) + with paddle.no_grad(): + for i, im_path in enumerate(img_lists[local_rank]): + im = cv2.imread(im_path) + ori_shape = im.shape[:2] + im, _ = transforms(im) + im = im[np.newaxis, ...] + im = paddle.to_tensor(im) + + if aug_pred: + pred = infer_ensemble.aug_inference( + model, + model_hard, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred = infer_ensemble.inference( + model, + model_hard, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + pred = paddle.squeeze(pred) + pred = pred.numpy().astype('uint8') + + # get the saved name + if image_dir is not None: + im_file = im_path.replace(image_dir, '') + else: + im_file = os.path.basename(im_path) + if im_file[0] == '/': + im_file = im_file[1:] + + # save added image + added_image = utils.visualize.visualize(im_path, pred, weight=0.6) + added_image_path = os.path.join(added_saved_dir, im_file) + mkdir(added_image_path) + cv2.imwrite(added_image_path, added_image) + + # save pseudo color prediction + pred_mask = utils.visualize.get_pseudo_color_map(pred) + pred_saved_path = os.path.join(pred_saved_dir, + im_file.rsplit(".")[0] + ".png") + mkdir(pred_saved_path) + pred_mask.save(pred_saved_path) + + # pred_im = utils.visualize(im_path, pred, weight=0.0) + # pred_saved_path = os.path.join(pred_saved_dir, im_file) + # mkdir(pred_saved_path) + # cv2.imwrite(pred_saved_path, pred_im) + + progbar_pred.update(i + 1) diff --git a/contrib/AutoNUE/core/predict_ensemble_three.py b/contrib/AutoNUE/core/predict_ensemble_three.py new file mode 100644 index 0000000000..dd5794bcdc --- /dev/null +++ b/contrib/AutoNUE/core/predict_ensemble_three.py @@ -0,0 +1,212 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import cv2 +import numpy as np +import paddle +import paddle.nn.functional as F + +from paddleseg import utils +import core.infer_ensemble_three as infer_ensemble +import core.infer_crop as infer_crop +from paddleseg.utils import logger, progbar + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +def partition_list(arr, m): + """split the list 'arr' into m pieces""" + n = int(math.ceil(len(arr) / float(m))) + return [arr[i:i + n] for i in range(0, len(arr), n)] + + +def predictEnsembleThree(model, + model_1, + model_crop, + model_path, + model_path_1, + model_path_crop, + transforms, + transforms_crop, + image_list, + image_dir=None, + save_dir='output', + aug_pred=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + predict and visualize the image_list. + + Args: + model (nn.Layer): Used to predict for input image. + model_path (str): The path of pretrained model. + transforms (transform.Compose): Preprocess for input image. + image_list (list): A list of image path to be predicted. + image_dir (str, optional): The root directory of the images predicted. Default: None. + save_dir (str, optional): The directory to save the visualized results. Default: 'output'. + aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False. + scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0. + flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True. + flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False. + is_slide (bool, optional): Whether to predict by sliding window. Default: False. + stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + + """ + utils.utils.load_entire_model(model, model_path) + model.eval() + utils.utils.load_entire_model(model_1, model_path_1) + model_1.eval() + utils.utils.load_entire_model(model_crop, model_path_crop) + model_crop.eval() + nranks = paddle.distributed.get_world_size() + local_rank = paddle.distributed.get_rank() + if nranks > 1: + img_lists = partition_list(image_list, nranks) + else: + img_lists = [image_list] + + added_saved_dir = os.path.join(save_dir, 'added_prediction') + pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction') + + logger.info("Start to predict...") + progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1) + with paddle.no_grad(): + for i, im_path in enumerate(img_lists[local_rank]): + im_origin = cv2.imread(im_path) + ori_shape = im_origin.shape[:2] + im, _ = transforms(im_origin) + im = im[np.newaxis, ...] + im = paddle.to_tensor(im) + + ims, _ = transforms_crop(im_origin) + im1 = ims[:, 540:540 + 720, 320:320 + 1280] + im2 = ims[:, 540:540 + 720, 960:960 + 1280] + im3 = ims[:, 540:540 + 720, 1600:1600 + 1280] + im1 = im1[np.newaxis, ...] + im1 = paddle.to_tensor(im1) + im2 = im2[np.newaxis, ...] + im2 = paddle.to_tensor(im2) + im3 = im3[np.newaxis, ...] + im3 = paddle.to_tensor(im3) + ims_ = [im1, im2, im3] + + if aug_pred: + pred = infer_ensemble.aug_inference( + model, + model_1, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred = infer_ensemble.inference( + model, + model_1, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + preds = [] + for ii in range(3): + im_ = ims_[ii] + if aug_pred: + pred_crop = infer_crop.aug_inference( + model, + im_, + ori_shape=ori_shape, + transforms=transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred_crop = infer_crop.inference( + model, + im_, + ori_shape=ori_shape, + transforms=transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + preds.append(pred_crop) + + left_ensem = ( + preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2 + right_ensem = ( + preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2 + pred_ensem = paddle.concat([ + preds[0][:, :, :, 0:640], left_ensem, right_ensem, + preds[2][:, :, :, 640:1280] + ], + axis=3) + logit = F.interpolate(pred_ensem, (432, 768), mode='bilinear') + + pred_logit = pred.clone() + pred_logit[:, :, 324:756, 576:1344] = logit + pred = pred + pred_logit + pred = F.interpolate(pred, ori_shape, mode='bilinear') + pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32') + pred = paddle.squeeze(pred) + pred = pred.numpy().astype('uint8') + + # get the saved name + if image_dir is not None: + im_file = im_path.replace(image_dir, '') + else: + im_file = os.path.basename(im_path) + if im_file[0] == '/': + im_file = im_file[1:] + + # save added image + added_image = utils.visualize.visualize(im_path, pred, weight=0.6) + added_image_path = os.path.join(added_saved_dir, im_file) + mkdir(added_image_path) + cv2.imwrite(added_image_path, added_image) + + # save pseudo color prediction + pred_mask = utils.visualize.get_pseudo_color_map(pred) + pred_saved_path = os.path.join(pred_saved_dir, + im_file.rsplit(".")[0] + ".png") + mkdir(pred_saved_path) + pred_mask.save(pred_saved_path) + + # pred_im = utils.visualize(im_path, pred, weight=0.0) + # pred_saved_path = os.path.join(pred_saved_dir, im_file) + # mkdir(pred_saved_path) + # cv2.imwrite(pred_saved_path, pred_im) + + progbar_pred.update(i + 1) diff --git a/contrib/AutoNUE/core/predict_generate_autolabel.py b/contrib/AutoNUE/core/predict_generate_autolabel.py new file mode 100755 index 0000000000..a9e1ad15ef --- /dev/null +++ b/contrib/AutoNUE/core/predict_generate_autolabel.py @@ -0,0 +1,145 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import math + +import cv2 +import numpy as np +import paddle + +from paddleseg import utils +from core import infer_generate_autolabel +from paddleseg.utils import logger, progbar + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +def partition_list(arr, m): + """split the list 'arr' into m pieces""" + n = int(math.ceil(len(arr) / float(m))) + return [arr[i:i + n] for i in range(0, len(arr), n)] + + +def predictAutolabel(model, + model_path, + transforms, + image_list, + image_dir=None, + save_dir='output', + aug_pred=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None): + """ + predict and visualize the image_list. + + Args: + model (nn.Layer): Used to predict for input image. + model_path (str): The path of pretrained model. + transforms (transform.Compose): Preprocess for input image. + image_list (list): A list of image path to be predicted. + image_dir (str, optional): The root directory of the images predicted. Default: None. + save_dir (str, optional): The directory to save the visualized results. Default: 'output'. + aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False. + scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0. + flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True. + flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False. + is_slide (bool, optional): Whether to predict by sliding window. Default: False. + stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + + """ + utils.utils.load_entire_model(model, model_path) + model.eval() + nranks = paddle.distributed.get_world_size() + local_rank = paddle.distributed.get_rank() + if nranks > 1: + img_lists = partition_list(image_list, nranks) + else: + img_lists = [image_list] + + added_saved_dir = os.path.join(save_dir, 'added_prediction') + pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction') + + logger.info("Start to predict...") + progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1) + with paddle.no_grad(): + for i, im_path in enumerate(img_lists[local_rank]): + im = cv2.imread(im_path) + ori_shape = im.shape[:2] + im, _ = transforms(im) + im = im[np.newaxis, ...] + im = paddle.to_tensor(im) + + if aug_pred: + pred = infer_generate_autolabel.aug_inference( + model, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred = infer_generate_autolabel.inference( + model, + im, + ori_shape=ori_shape, + transforms=transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + pred = paddle.squeeze(pred) + pred = pred.numpy().astype('uint8') + + # get the saved name + if image_dir is not None: + im_file = im_path.replace(image_dir, '') + else: + im_file = os.path.basename(im_path) + if im_file[0] == '/': + im_file = im_file[1:] + + # save added image + added_image = utils.visualize.visualize(im_path, pred, weight=0.6) + added_image_path = os.path.join(added_saved_dir, im_file) + mkdir(added_image_path) + cv2.imwrite(added_image_path, added_image) + + # save pseudo color prediction + pred_mask = utils.visualize.get_pseudo_color_map(pred) + pred_saved_path = os.path.join(pred_saved_dir, + im_file.rsplit(".")[0] + ".png") + mkdir(pred_saved_path) + pred_mask.save(pred_saved_path) + + # pred_im = utils.visualize(im_path, pred, weight=0.0) + # pred_saved_path = os.path.join(pred_saved_dir, im_file) + # mkdir(pred_saved_path) + # cv2.imwrite(pred_saved_path, pred_im) + + progbar_pred.update(i + 1) diff --git a/contrib/AutoNUE/core/val.py b/contrib/AutoNUE/core/val.py new file mode 100755 index 0000000000..cb78348c25 --- /dev/null +++ b/contrib/AutoNUE/core/val.py @@ -0,0 +1,175 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import time +import paddle +import paddle.nn.functional as F + +from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar +from core import infer + +np.set_printoptions(suppress=True) + + +def evaluate(model, + eval_dataset, + aug_eval=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + num_workers=0, + print_detail=True): + """ + Launch evalution. + + Args: + model(nn.Layer): A sementic segmentation model. + eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. + aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False. + scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0. + flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True. + flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False. + is_slide (bool, optional): Whether to evaluate by sliding window. Default: False. + stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + num_workers (int, optional): Num workers for data loader. Default: 0. + print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. + + Returns: + float: The mIoU of validation datasets. + float: The accuracy of validation datasets. + """ + model.eval() + nranks = paddle.distributed.ParallelEnv().nranks + local_rank = paddle.distributed.ParallelEnv().local_rank + if nranks > 1: + # Initialize parallel environment if not done. + if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( + ): + paddle.distributed.init_parallel_env() + batch_sampler = paddle.io.DistributedBatchSampler( + eval_dataset, batch_size=1, shuffle=False, drop_last=False) + loader = paddle.io.DataLoader( + eval_dataset, + batch_sampler=batch_sampler, + num_workers=num_workers, + return_list=True, + ) + + total_iters = len(loader) + intersect_area_all = 0 + pred_area_all = 0 + label_area_all = 0 + + if print_detail: + logger.info( + "Start evaluating (total_samples={}, total_iters={})...".format( + len(eval_dataset), total_iters)) + progbar_val = progbar.Progbar(target=total_iters, verbose=1) + reader_cost_averager = TimeAverager() + batch_cost_averager = TimeAverager() + batch_start = time.time() + with paddle.no_grad(): + for iter, data in enumerate(loader): + (im, label) = data + reader_cost_averager.record(time.time() - batch_start) + label = label.astype('int64') + + ori_shape = label.shape[-2:] + if aug_eval: + pred = infer.aug_inference( + model, + im, + ori_shape=ori_shape, + transforms=eval_dataset.transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred = infer.inference( + model, + im, + ori_shape=ori_shape, + transforms=eval_dataset.transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + + intersect_area, pred_area, label_area = metrics.calculate_area( + pred, + label, + eval_dataset.num_classes, + ignore_index=eval_dataset.ignore_index) + + # Gather from all ranks + if nranks > 1: + intersect_area_list = [] + pred_area_list = [] + label_area_list = [] + paddle.distributed.all_gather(intersect_area_list, + intersect_area) + paddle.distributed.all_gather(pred_area_list, pred_area) + paddle.distributed.all_gather(label_area_list, label_area) + + # Some image has been evaluated and should be eliminated in last iter + if (iter + 1) * nranks > len(eval_dataset): + valid = len(eval_dataset) - iter * nranks + intersect_area_list = intersect_area_list[:valid] + pred_area_list = pred_area_list[:valid] + label_area_list = label_area_list[:valid] + + for i in range(len(intersect_area_list)): + intersect_area_all = intersect_area_all + intersect_area_list[ + i] + pred_area_all = pred_area_all + pred_area_list[i] + label_area_all = label_area_all + label_area_list[i] + else: + intersect_area_all = intersect_area_all + intersect_area + pred_area_all = pred_area_all + pred_area + label_area_all = label_area_all + label_area + batch_cost_averager.record( + time.time() - batch_start, num_samples=len(label)) + batch_cost = batch_cost_averager.get_average() + reader_cost = reader_cost_averager.get_average() + + if local_rank == 0 and print_detail: + progbar_val.update(iter + 1, [('batch_cost', batch_cost), + ('reader cost', reader_cost)]) + reader_cost_averager.reset() + batch_cost_averager.reset() + batch_start = time.time() + + class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all, + label_area_all) + class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all) + kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all) + + if print_detail: + logger.info( + "[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".format( + len(eval_dataset), miou, acc, kappa)) + logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4))) + logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4))) + return miou, acc diff --git a/contrib/AutoNUE/core/val_crop.py b/contrib/AutoNUE/core/val_crop.py new file mode 100755 index 0000000000..9ed99e4e34 --- /dev/null +++ b/contrib/AutoNUE/core/val_crop.py @@ -0,0 +1,190 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import numpy as np +import time +import paddle +import paddle.nn.functional as F + +from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar +from core import infer_crop + +np.set_printoptions(suppress=True) + + +def evaluate(model, + eval_dataset, + aug_eval=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + num_workers=0, + print_detail=True): + """ + Launch evalution. + + Args: + model(nn.Layer): A sementic segmentation model. + eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. + aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False. + scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0. + flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True. + flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False. + is_slide (bool, optional): Whether to evaluate by sliding window. Default: False. + stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. + It should be provided when `is_slide` is True. + num_workers (int, optional): Num workers for data loader. Default: 0. + print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. + + Returns: + float: The mIoU of validation datasets. + float: The accuracy of validation datasets. + """ + model.eval() + nranks = paddle.distributed.ParallelEnv().nranks + local_rank = paddle.distributed.ParallelEnv().local_rank + if nranks > 1: + # Initialize parallel environment if not done. + if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( + ): + paddle.distributed.init_parallel_env() + batch_sampler = paddle.io.DistributedBatchSampler( + eval_dataset, batch_size=1, shuffle=False, drop_last=False) + loader = paddle.io.DataLoader( + eval_dataset, + batch_sampler=batch_sampler, + num_workers=num_workers, + return_list=True, + ) + + total_iters = len(loader) + intersect_area_all = 0 + pred_area_all = 0 + label_area_all = 0 + + if print_detail: + logger.info( + "Start evaluating (total_samples={}, total_iters={})...".format( + len(eval_dataset), total_iters)) + progbar_val = progbar.Progbar(target=total_iters, verbose=1) + reader_cost_averager = TimeAverager() + batch_cost_averager = TimeAverager() + batch_start = time.time() + with paddle.no_grad(): + for iter, data in enumerate(loader): + reader_cost_averager.record(time.time() - batch_start) + preds = [] + label = data[3].astype('int64') + for ii in range(3): + im = data[ii] + ori_shape = im.shape[-2:] + if aug_eval: + pred = infer_crop.aug_inference( + model, + im, + ori_shape=ori_shape, + transforms=eval_dataset.transforms.transforms, + scales=scales, + flip_horizontal=flip_horizontal, + flip_vertical=flip_vertical, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + else: + pred = infer_crop.inference( + model, + im, + ori_shape=ori_shape, + transforms=eval_dataset.transforms.transforms, + is_slide=is_slide, + stride=stride, + crop_size=crop_size) + preds.append(pred) + + left_ensem = ( + preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2 + right_ensem = ( + preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2 + pred_ensem = paddle.concat([ + preds[0][:, :, :, 0:640], left_ensem, right_ensem, + preds[2][:, :, :, 640:1280] + ], + axis=3) + pred = paddle.argmax( + pred_ensem, axis=1, keepdim=True, dtype='int32') + + intersect_area, pred_area, label_area = metrics.calculate_area( + pred, + label, + eval_dataset.num_classes, + ignore_index=eval_dataset.ignore_index) + + # Gather from all ranks + if nranks > 1: + intersect_area_list = [] + pred_area_list = [] + label_area_list = [] + paddle.distributed.all_gather(intersect_area_list, + intersect_area) + paddle.distributed.all_gather(pred_area_list, pred_area) + paddle.distributed.all_gather(label_area_list, label_area) + + # Some image has been evaluated and should be eliminated in last iter + if (iter + 1) * nranks > len(eval_dataset): + valid = len(eval_dataset) - iter * nranks + intersect_area_list = intersect_area_list[:valid] + pred_area_list = pred_area_list[:valid] + label_area_list = label_area_list[:valid] + + for i in range(len(intersect_area_list)): + intersect_area_all = intersect_area_all + intersect_area_list[ + i] + pred_area_all = pred_area_all + pred_area_list[i] + label_area_all = label_area_all + label_area_list[i] + else: + intersect_area_all = intersect_area_all + intersect_area + pred_area_all = pred_area_all + pred_area + label_area_all = label_area_all + label_area + + batch_cost_averager.record( + time.time() - batch_start, num_samples=len(label)) + batch_cost = batch_cost_averager.get_average() + reader_cost = reader_cost_averager.get_average() + + if local_rank == 0 and print_detail: + progbar_val.update(iter + 1, [('batch_cost', batch_cost), + ('reader cost', reader_cost)]) + reader_cost_averager.reset() + batch_cost_averager.reset() + batch_start = time.time() + + class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all, + label_area_all) + class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all) + kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all) + + if print_detail: + logger.info( + "[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".format( + len(eval_dataset), miou, acc, kappa)) + logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4))) + logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4))) + return miou, acc diff --git a/legacy/contrib/HumanSeg/datasets/__init__.py b/contrib/AutoNUE/datasets/__init__.py old mode 100644 new mode 100755 similarity index 75% rename from legacy/contrib/HumanSeg/datasets/__init__.py rename to contrib/AutoNUE/datasets/__init__.py index 77dae99526..8b5777ffa9 --- a/legacy/contrib/HumanSeg/datasets/__init__.py +++ b/contrib/AutoNUE/datasets/__init__.py @@ -1,5 +1,4 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,4 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .dataset import Dataset +from .auto_nue import AutoNue +from .auto_nue_autolabel import AutoNueAutolabel +from .auto_nue_crop import AutoNueCrop diff --git a/contrib/AutoNUE/datasets/auto_nue.py b/contrib/AutoNUE/datasets/auto_nue.py new file mode 100755 index 0000000000..6230e19cf6 --- /dev/null +++ b/contrib/AutoNUE/datasets/auto_nue.py @@ -0,0 +1,177 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import random +import cv2 +import paddle +import numpy as np +from PIL import Image + +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent. +random.seed(100) + + +@manager.DATASETS.add_component +class AutoNue(paddle.io.Dataset): + """ + You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5) + following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling. + + And then, you need to organize data following the below structure. + + IDD_Segmentation + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + + Args: + transforms (list): Transforms for image. + dataset_root (str): Cityscapes dataset directory. + mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1 + add_val (bool, optional): Whether to add val set in training. Default: False + """ + + def __init__(self, + transforms, + dataset_root, + mode='train', + coarse_multiple=1, + add_val=False): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + mode = mode.lower() + self.mode = mode + self.num_classes = 26 + self.ignore_index = 255 + self.coarse_multiple = coarse_multiple + + if mode not in ['train', 'val', 'test']: + raise ValueError( + "mode should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise ValueError("`transforms` is necessary, but it is None.") + + img_dir = os.path.join(self.dataset_root, 'leftImg8bit') + label_dir = os.path.join(self.dataset_root, 'gtFine') + if self.dataset_root is None or not os.path.isdir( + self.dataset_root) or not os.path.isdir( + img_dir) or not os.path.isdir(label_dir): + raise ValueError( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + label_files = sorted( + glob.glob( + os.path.join(label_dir, mode, '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*'))) + + self.file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + # for ii in range(len(self.file_list)): + # print(self.file_list[ii]) + # print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + if mode == 'train': + # whether to add val set in training + if add_val: + label_files = sorted( + glob.glob( + os.path.join(label_dir, 'val', '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob( + os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*'))) + val_file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + self.file_list.extend(val_file_list) + for ii in range(len(self.file_list)): + print(self.file_list[ii]) + print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + # use coarse dataset only in training + # img_dir = os.path.join('data/IDD_Detection/JPEGImages/all') + # label_dir = os.path.join('data/IDD_Detection/AutoLabel/pred_refine') + + # if self.dataset_root is None or not os.path.isdir( + # self.dataset_root) or not os.path.isdir( + # img_dir) or not os.path.isdir(label_dir): + # raise ValueError( + # "The coarse dataset is not Found or the folder structure is nonconfoumance." + # ) + + # coarse_label_files = sorted( + # glob.glob(os.path.join(label_dir, '*', '*'))) + # coarse_img_files = sorted( + # glob.glob(os.path.join(img_dir, '*', '*'))) + # if len(coarse_img_files) != len(coarse_label_files): + # raise ValueError( + # "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset." + # .format(len(coarse_img_files), len(coarse_label_files))) + + # self.coarse_file_list = [[img_path, label_path] + # for img_path, label_path in zip( + # coarse_img_files, coarse_label_files)] + # random.shuffle(self.coarse_file_list) + + # self.total_num_files = int(self.num_files * (1 + coarse_multiple)) + + def __getitem__(self, idx): + if self.mode == 'test': + image_path, label_path = self.file_list[idx] + im, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + return im, image_path + elif self.mode == 'val': + image_path, label_path = self.file_list[idx] + im, _ = self.transforms(im=image_path) + label = np.asarray(Image.open(label_path)) + # label = cv2.resize(label, (1280, 720), interpolation=cv2.INTER_NEAREST) + label = label[np.newaxis, :, :] + return im, label + else: + if idx >= self.num_files: + image_path, label_path = self.coarse_file_list[idx - + self.num_files] + else: + image_path, label_path = self.file_list[idx] + + im, label = self.transforms(im=image_path, label=label_path) + return im, label + + def __len__(self): + return self.total_num_files diff --git a/contrib/AutoNUE/datasets/auto_nue_autolabel.py b/contrib/AutoNUE/datasets/auto_nue_autolabel.py new file mode 100755 index 0000000000..3c69ec81a1 --- /dev/null +++ b/contrib/AutoNUE/datasets/auto_nue_autolabel.py @@ -0,0 +1,181 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import random + +import paddle +import numpy as np +from PIL import Image + +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent. +random.seed(100) + + +@manager.DATASETS.add_component +class AutoNueAutolabel(paddle.io.Dataset): + """ + You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5) + following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling. + + And then, you need to organize data following the below structure. + + IDD_Segmentation + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + + Args: + transforms (list): Transforms for image. + dataset_root (str): Cityscapes dataset directory. + mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1 + add_val (bool, optional): Whether to add val set in training. Default: False + """ + + def __init__(self, + transforms, + dataset_root, + mode='train', + coarse_multiple=1, + add_val=False): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + mode = mode.lower() + self.mode = mode + self.num_classes = 26 + self.ignore_index = 255 + self.coarse_multiple = coarse_multiple + + if mode not in ['train', 'val', 'test']: + raise ValueError( + "mode should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise ValueError("`transforms` is necessary, but it is None.") + + img_dir = os.path.join(self.dataset_root, 'leftImg8bit') + label_dir = os.path.join(self.dataset_root, 'gtFine') + if self.dataset_root is None or not os.path.isdir( + self.dataset_root) or not os.path.isdir( + img_dir) or not os.path.isdir(label_dir): + raise ValueError( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + label_files = sorted( + glob.glob( + os.path.join(label_dir, mode, '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*'))) + + self.file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + # for ii in range(len(self.file_list)): + # print(self.file_list[ii]) + # print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + if mode == 'train': + # whether to add val set in training + if add_val: + label_files = sorted( + glob.glob( + os.path.join(label_dir, 'val', '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob( + os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*'))) + val_file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + self.file_list.extend(val_file_list) + for ii in range(len(self.file_list)): + print(self.file_list[ii]) + print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + # use coarse dataset only in training + img_dir = os.path.join('data/IDD_Detection/JPEGImages') + label_dir = os.path.join('data/IDD_Detection/pred_refine') + + if self.dataset_root is None or not os.path.isdir( + self.dataset_root) or not os.path.isdir( + img_dir) or not os.path.isdir(label_dir): + raise ValueError( + "The coarse dataset is not Found or the folder structure is nonconfoumance." + ) + + coarse_label_files = sorted( + glob.glob(os.path.join(label_dir, '*', '*'))) + coarse_img_files = sorted( + glob.glob(os.path.join(img_dir, '*', '*'))) + if len(coarse_img_files) != len(coarse_label_files): + raise ValueError( + "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset." + .format(len(coarse_img_files), len(coarse_label_files))) + + self.coarse_file_list = [[img_path, label_path] + for img_path, label_path in zip( + coarse_img_files, coarse_label_files)] + random.shuffle(self.coarse_file_list) + + self.file_list = self.coarse_file_list + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + print(self.num_files) + + # self.total_num_files = int(self.num_files * (1 + coarse_multiple)) + + def __getitem__(self, idx): + if self.mode == 'test': + image_path, label_path = self.file_list[idx] + im, _ = self.transforms(im=image_path) + im = im[np.newaxis, ...] + return im, image_path + elif self.mode == 'val': + image_path, label_path = self.file_list[idx] + im, _ = self.transforms(im=image_path) + label = np.asarray(Image.open(label_path)) + label = label[np.newaxis, :, :] + return im, label + else: + # if idx >= self.num_files: + # image_path, label_path = self.coarse_file_list[idx - + # self.num_files] + # else: + image_path, label_path = self.file_list[idx] + + im, label = self.transforms(im=image_path, label=label_path) + return im, label + + def __len__(self): + return self.total_num_files diff --git a/contrib/AutoNUE/datasets/auto_nue_crop.py b/contrib/AutoNUE/datasets/auto_nue_crop.py new file mode 100755 index 0000000000..620f67a60c --- /dev/null +++ b/contrib/AutoNUE/datasets/auto_nue_crop.py @@ -0,0 +1,162 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import random + +import paddle +import numpy as np +from PIL import Image + +from paddleseg.cvlibs import manager +from paddleseg.transforms import Compose + +# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent. +random.seed(100) + + +@manager.DATASETS.add_component +class AutoNueCrop(paddle.io.Dataset): + """ + You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5) + following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling. + + And then, you need to organize data following the below structure. + + IDD_Segmentation + | + |--leftImg8bit + | |--train + | |--val + | |--test + | + |--gtFine + | |--train + | |--val + | |--test + + Args: + transforms (list): Transforms for image. + dataset_root (str): Cityscapes dataset directory. + mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'. + coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1 + add_val (bool, optional): Whether to add val set in training. Default: False + """ + + def __init__(self, + transforms, + dataset_root, + mode='train', + coarse_multiple=1, + add_val=False): + self.dataset_root = dataset_root + self.transforms = Compose(transforms) + self.file_list = list() + mode = mode.lower() + self.mode = mode + self.num_classes = 26 + self.ignore_index = 255 + self.coarse_multiple = coarse_multiple + + if mode not in ['train', 'val', 'test']: + raise ValueError( + "mode should be 'train', 'val' or 'test', but got {}.".format( + mode)) + + if self.transforms is None: + raise ValueError("`transforms` is necessary, but it is None.") + + img_dir = os.path.join(self.dataset_root, 'leftImg8bit') + label_dir = os.path.join(self.dataset_root, 'gtFine') + if self.dataset_root is None or not os.path.isdir( + self.dataset_root) or not os.path.isdir( + img_dir) or not os.path.isdir(label_dir): + raise ValueError( + "The dataset is not Found or the folder structure is nonconfoumance." + ) + + label_files = sorted( + glob.glob( + os.path.join(label_dir, mode, '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*'))) + + self.file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + # for ii in range(len(self.file_list)): + # print(self.file_list[ii]) + # print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + if mode == 'train': + # whether to add val set in training + if add_val: + label_files = sorted( + glob.glob( + os.path.join(label_dir, 'val', '*', + '*_gtFine_labellevel3Ids.png'))) + img_files = sorted( + glob.glob( + os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*'))) + val_file_list = [[ + img_path, label_path + ] for img_path, label_path in zip(img_files, label_files)] + self.file_list.extend(val_file_list) + for ii in range(len(self.file_list)): + print(self.file_list[ii]) + print(len(self.file_list)) + self.num_files = len(self.file_list) + self.total_num_files = self.num_files + + def __getitem__(self, idx): + if self.mode == 'test': + image_path, label_path = self.file_list[idx] + im, _ = self.transforms(im=image_path) + (h, w) = im.shape[1:] + im1 = im[:, 540:540 + 720, 320:320 + 1280] + im2 = im[:, 540:540 + 720, 960:960 + 1280] + im3 = im[:, 540:540 + 720, 1600:1600 + 1280] + return im1, im2, im3, image_path + elif self.mode == 'val': + image_path, label_path = self.file_list[idx] + im, label = self.transforms(im=image_path, label=label_path) + (h, w) = im.shape[1:] + im1 = im[:, 540:540 + 720, 320:320 + 1280] + im2 = im[:, 540:540 + 720, 960:960 + 1280] + im3 = im[:, 540:540 + 720, 1600:1600 + 1280] + label = label[540:540 + 720, 320:1600 + 1280] + return im1, im2, im3, label + else: + if idx >= self.num_files: + image_path, label_path = self.coarse_file_list[idx - + self.num_files] + else: + image_path, label_path = self.file_list[idx] + + im, label = self.transforms(im=image_path, label=label_path) + (h, w) = im.shape[1:] + + start_w = np.linspace(320, 1600, 5).tolist() + np.random.shuffle(start_w) + start = int(start_w[0]) + crop_im = im[:, 540:540 + 720, start:(start + 1280)] + crop_label = label[540:540 + 720, start:(start + 1280)] + return crop_im, crop_label + + def __len__(self): + return self.total_num_files diff --git a/legacy/contrib/LaneNet/models/__init__.py b/contrib/AutoNUE/models/__init__.py old mode 100644 new mode 100755 similarity index 79% rename from legacy/contrib/LaneNet/models/__init__.py rename to contrib/AutoNUE/models/__init__.py index 6a2095cef4..dd69421f87 --- a/legacy/contrib/LaneNet/models/__init__.py +++ b/contrib/AutoNUE/models/__init__.py @@ -1,5 +1,4 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -import models.modeling -#import models.backbone +from .backbones.hrnet_nv import * +from .mscale_ocrnet import MscaleOCRNet diff --git a/contrib/AutoNUE/models/backbones/hrnet_nv.py b/contrib/AutoNUE/models/backbones/hrnet_nv.py new file mode 100755 index 0000000000..6ccf4817d5 --- /dev/null +++ b/contrib/AutoNUE/models/backbones/hrnet_nv.py @@ -0,0 +1,822 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers +from paddleseg.utils import utils + +__all__ = [ + "HRNet_W18_NV_Small_V1", "HRNet_W18_NV_Small_V2", "HRNet_W18_NV", + "HRNet_W30_NV", "HRNet_W32_NV", "HRNet_W40_NV", "HRNet_W44_NV", + "HRNet_W48_NV", "HRNet_W60_NV", "HRNet_W64_NV" +] + + +class HRNetNV(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + The difference from HRNet at paddleseg/models/backbones/hrnet.py is + 1. The padding parameter of convolution is different. + + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + pretrained (str): The path of pretrained model. + stage1_num_modules (int): Number of modules for stage1. Default 1. + stage1_num_blocks (list): Number of blocks per module for stage1. Default [4]. + stage1_num_channels (list): Number of channels per branch for stage1. Default [64]. + stage2_num_modules (int): Number of modules for stage2. Default 1. + stage2_num_blocks (list): Number of blocks per module for stage2. Default [4, 4] + stage2_num_channels (list): Number of channels per branch for stage2. Default [18, 36]. + stage3_num_modules (int): Number of modules for stage3. Default 4. + stage3_num_blocks (list): Number of blocks per module for stage3. Default [4, 4, 4] + stage3_num_channels (list): Number of channels per branch for stage3. Default [18, 36, 72]. + stage4_num_modules (int): Number of modules for stage4. Default 3. + stage4_num_blocks (list): Number of blocks per module for stage4. Default [4, 4, 4, 4] + stage4_num_channels (list): Number of channels per branch for stage4. Default [18, 36, 72. 144]. + has_se (bool): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + pretrained=None, + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + has_se=False, + align_corners=False): + super(HRNetNV, self).__init__() + self.pretrained = pretrained + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = layers.ConvBNReLU( + in_channels=3, + out_channels=64, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False) + + self.conv_layer1_2 = layers.ConvBNReLU( + in_channels=64, + out_channels=64, + kernel_size=3, + stride=2, + padding=1, + bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], + out_channels=self.stage2_num_channels, + name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, + out_channels=self.stage3_num_channels, + name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, + out_channels=self.stage4_num_channels, + name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + self.init_weight() + + def forward(self, x): + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = F.interpolate( + st4[1], (x0_h, x0_w), + mode='bilinear', + align_corners=self.align_corners) + x2 = F.interpolate( + st4[2], (x0_h, x0_w), + mode='bilinear', + align_corners=self.align_corners) + x3 = F.interpolate( + st4[3], (x0_h, x0_w), + mode='bilinear', + align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + def init_weight(self): + for layer in self.sublayers(): + if isinstance(layer, nn.Conv2D): + param_init.normal_init(layer.weight, std=0.001) + elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)): + param_init.constant_init(layer.weight, value=1.0) + param_init.constant_init(layer.bias, value=0.0) + if self.pretrained is not None: + utils.load_pretrained_model(self, self.pretrained) + + +class Layer1(nn.Layer): + def __init__(self, + num_channels, + num_filters, + num_blocks, + has_se=False, + name=None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x): + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels, out_channels, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding=1, + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x): + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, + num_blocks, + in_channels, + out_channels, + has_se=False, + name=None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x): + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + has_se, + stride=1, + downsample=False, + name=None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=1, + padding=0, + bias_attr=False) + + self.conv2 = layers.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) + + self.conv3 = layers.ConvBN( + in_channels=num_filters, + out_channels=num_filters * 4, + kernel_size=1, + padding=0, + bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBN( + in_channels=num_channels, + out_channels=num_filters * 4, + kernel_size=1, + bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, + num_filters=num_filters * 4, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, x): + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels, + num_filters, + stride=1, + has_se=False, + downsample=False, + name=None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding=1, + bias_attr=False) + self.conv2 = layers.ConvBN( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + padding=1, + bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=1, + padding=0, + bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters, + num_filters=num_filters, + reduction_ratio=16, + name=name + '_fc') + + def forward(self, x): + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, + med_ch, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, + num_filters, + weight_attr=paddle.ParamAttr( + initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x): + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape( + excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels, + num_modules, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None, + align_corners=False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x): + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels, + num_blocks, + num_filters, + has_se=False, + multi_scale_output=True, + name=None, + align_corners=False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, + in_channels=num_channels, + out_channels=num_filters, + has_se=has_se, + name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x): + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels, + out_channels, + multi_scale_output=True, + name=None, + align_corners=False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + layers.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding=0, + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + layers.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format( + name, i + 1, j + 1, k + 1), + layers.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x): + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = residual.shape[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate( + y, + residual_shape, + mode='bilinear', + align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs + + +@manager.BACKBONES.add_component +def HRNet_W18_NV_Small_V1(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[1], + stage1_num_channels=[32], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[16, 32], + stage3_num_modules=1, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[16, 32, 64], + stage4_num_modules=1, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[16, 32, 64, 128], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W18_NV_Small_V2(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[2], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[2, 2], + stage2_num_channels=[18, 36], + stage3_num_modules=3, + stage3_num_blocks=[2, 2, 2], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=2, + stage4_num_blocks=[2, 2, 2, 2], + stage4_num_channels=[18, 36, 72, 144], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W18_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[18, 36], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[18, 36, 72], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[18, 36, 72, 144], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W30_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[30, 60], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[30, 60, 120], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[30, 60, 120, 240], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W32_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[32, 64], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[32, 64, 128], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[32, 64, 128, 256], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W40_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[40, 80], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[40, 80, 160], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[40, 80, 160, 320], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W44_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[44, 88], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[44, 88, 176], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[44, 88, 176, 352], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W48_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[48, 96], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[48, 96, 192], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[48, 96, 192, 384], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W60_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[60, 120], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[60, 120, 240], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[60, 120, 240, 480], + **kwargs) + return model + + +@manager.BACKBONES.add_component +def HRNet_W64_NV(**kwargs): + model = HRNetNV( + stage1_num_modules=1, + stage1_num_blocks=[4], + stage1_num_channels=[64], + stage2_num_modules=1, + stage2_num_blocks=[4, 4], + stage2_num_channels=[64, 128], + stage3_num_modules=4, + stage3_num_blocks=[4, 4, 4], + stage3_num_channels=[64, 128, 256], + stage4_num_modules=3, + stage4_num_blocks=[4, 4, 4, 4], + stage4_num_channels=[64, 128, 256, 512], + **kwargs) + return model diff --git a/contrib/AutoNUE/models/mscale_ocrnet.py b/contrib/AutoNUE/models/mscale_ocrnet.py new file mode 100755 index 0000000000..8b72e3ba02 --- /dev/null +++ b/contrib/AutoNUE/models/mscale_ocrnet.py @@ -0,0 +1,278 @@ +import math + +import paddle +import paddle.nn as nn +from paddleseg.cvlibs import manager, param_init +from paddleseg.utils import utils +from paddleseg.models import layers +from .ocrnet_nv import OCRNetNV + + +@manager.MODELS.add_component +class MscaleOCRNet(nn.Layer): + def __init__(self, + num_classes, + backbone, + backbone_indices, + n_scales=[0.5, 1.0, 2.0], + ocr_mid_channels=512, + ocr_key_channels=256, + align_corners=False, + pretrained=None): + super().__init__() + self.ocrnet = OCRNetNV( + num_classes, + backbone, + backbone_indices, + ocr_mid_channels=ocr_mid_channels, + ocr_key_channels=ocr_key_channels, + align_corners=align_corners, + ms_attention=True) + self.scale_attn = AttenHead(in_ch=ocr_mid_channels, out_ch=1) + + self.n_scales = n_scales + self.pretrained = pretrained + self.align_corners = align_corners + + if self.pretrained is not None: + utils.load_pretrained_model(self, self.pretrained) + # backbone.init_weight() + + def forward(self, x): + if self.training: + return self.one_scale_forward(x) + else: + return self.nscale_forward(x, self.n_scales) + + def one_scale_forward(self, x): + + x_size = x.shape[2:] + cls_out, aux_out, _ = self.ocrnet(x) + + cls_out = nn.functional.interpolate( + cls_out, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + aux_out = nn.functional.interpolate( + aux_out, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + + output = [cls_out, aux_out] + output.extend(output) + return output + + def one_scale_val(self, x): + + x_size = x.shape[2:] + cls_out, aux_out, _ = self.ocrnet(x) + + cls_out = nn.functional.interpolate( + cls_out, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + + output = [cls_out] + return output + + def two_scale_forward(self, x_1x): + """ + Do we supervised both aux outputs, lo and high scale? + Should attention be used to combine the aux output? + Normally we only supervise the combined 1x output + + If we use attention to combine the aux outputs, then + we can use normal weighting for aux vs. cls outputs + """ + x_lo = nn.functional.interpolate( + x_1x, + scale_factor=0.5, + align_corners=self.align_corners, + mode='bilinear') + lo_outs = self.single_scale_forward(x_lo) + + pred_05x = lo_outs['cls_out'] + p_lo = pred_05x + aux_lo = lo_outs['aux_out'] + logit_attn = lo_outs['logit_attn'] + + hi_outs = self.single_scale_forward(x_1x) + pred_10x = hi_outs['cls_out'] + p_1x = pred_10x + aux_1x = hi_outs['aux_out'] + + p_lo = p_lo * logit_attn + aux_lo = aux_lo * logit_attn + p_lo = scale_as(p_lo, p_1x) + aux_lo = scale_as(aux_lo, p_1x) + + logit_attn = scale_as(logit_attn, p_1x) + + # combine lo and hi predictions with attention + joint_pred = p_lo + p_1x * (1 - logit_attn) + joint_aux = aux_lo + aux_1x * (1 - logit_attn) + + output = [joint_pred, joint_aux] + + # Optionally, apply supervision to the multi-scale predictions + # directly. + scaled_pred_05x = scale_as(pred_05x, p_1x) + output.extend([scaled_pred_05x, pred_10x]) + output.extend(output) + return output + + def two_scale_forward_high(self, x_1x): + """ + Do we supervised both aux outputs, lo and high scale? + Should attention be used to combine the aux output? + Normally we only supervise the combined 1x output + + If we use attention to combine the aux outputs, then + we can use normal weighting for aux vs. cls outputs + """ + x_hi = nn.functional.interpolate( + x_1x, + scale_factor=1.5, + align_corners=self.align_corners, + mode='bilinear') + + lo_outs = self.single_scale_forward(x_1x) + pred_10x = lo_outs['cls_out'] + p_lo = pred_10x + aux_lo = lo_outs['aux_out'] + logit_attn = lo_outs['logit_attn'] + + hi_outs = self.single_scale_forward(x_hi) + pred_15x = hi_outs['cls_out'] + p_hi = pred_15x + aux_hi = hi_outs['aux_out'] + + p_lo = p_lo * logit_attn + aux_lo = aux_lo * logit_attn + p_hi = scale_as(p_hi, p_lo) + aux_hi = scale_as(aux_hi, aux_lo) + + # combine lo and hi predictions with attention + joint_pred = p_lo + p_hi * (1 - logit_attn) + joint_aux = aux_lo + aux_hi * (1 - logit_attn) + + output = [joint_pred, joint_aux] + + # Optionally, apply supervision to the multi-scale predictions + # directly. + scaled_pred_15x = scale_as(pred_15x, p_lo) + output.extend(output) + return output + + def nscale_forward(self, x_1x, scales): + """ + Hierarchical attention, primarily used for getting best inference + results. + + We use attention at multiple scales, giving priority to the lower + resolutions. For example, if we have 4 scales {0.5, 1.0, 1.5, 2.0}, + then evaluation is done as follows: + + p_joint = attn_1.5 * p_1.5 + (1 - attn_1.5) * down(p_2.0) + p_joint = attn_1.0 * p_1.0 + (1 - attn_1.0) * down(p_joint) + p_joint = up(attn_0.5 * p_0.5) * (1 - up(attn_0.5)) * p_joint + + The target scale is always 1.0, and 1.0 is expected to be part of the + list of scales. When predictions are done at greater than 1.0 scale, + the predictions are downsampled before combining with the next lower + scale. + + x_1x: + scales - a list of scales to evaluate + x_1x - dict containing 'images', the x_1x, and 'gts', the ground + truth mask + + Output: + If training, return loss, else return prediction + attention + """ + assert 1.0 in scales, 'expected 1.0 to be the target scale' + # Lower resolution provides attention for higher rez predictions, + # so we evaluate in order: high to low + scales = sorted(scales, reverse=True) + + pred = None + + for s in scales: + x = nn.functional.interpolate( + x_1x, + scale_factor=s, + align_corners=self.align_corners, + mode='bilinear') + outs = self.single_scale_forward(x) + + cls_out = outs['cls_out'] + attn_out = outs['logit_attn'] + + if pred is None: + pred = cls_out + elif s >= 1.0: + # downscale previous + pred = scale_as(pred, cls_out, self.align_corners) + pred = cls_out * attn_out + pred * (1 - attn_out) + else: + # s < 1.0: upscale current + cls_out = cls_out * attn_out + + cls_out = scale_as(cls_out, pred, self.align_corners) + attn_out = scale_as(attn_out, pred, self.align_corners) + + pred = cls_out + pred * (1 - attn_out) + + return [pred] + + def single_scale_forward(self, x): + x_size = x.shape[2:] + cls_out, aux_out, ocr_mid_feats = self.ocrnet(x) + attn = self.scale_attn(ocr_mid_feats) + + cls_out = nn.functional.interpolate( + cls_out, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + aux_out = nn.functional.interpolate( + aux_out, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + attn = nn.functional.interpolate( + attn, + size=x_size, + mode='bilinear', + align_corners=self.align_corners) + + return {'cls_out': cls_out, 'aux_out': aux_out, 'logit_attn': attn} + + +class AttenHead(nn.Layer): + def __init__(self, in_ch, out_ch): + super(AttenHead, self).__init__() + # bottleneck channels for seg and attn heads + bot_ch = 256 + + self.atten_head = nn.Sequential( + layers.ConvBNReLU(in_ch, bot_ch, 3, padding=1, bias_attr=False), + layers.ConvBNReLU(bot_ch, bot_ch, 3, padding=1, bias_attr=False), + nn.Conv2D(bot_ch, out_ch, kernel_size=(1, 1), bias_attr=False), + nn.Sigmoid()) + + def forward(self, x): + return self.atten_head(x) + + +def scale_as(x, y, align_corners=False): + ''' + scale x to the same size as y + ''' + y_size = y.shape[2], y.shape[3] + x_scaled = nn.functional.interpolate( + x, size=y_size, mode='bilinear', align_corners=align_corners) + return x_scaled diff --git a/contrib/AutoNUE/models/ocrnet_nv.py b/contrib/AutoNUE/models/ocrnet_nv.py new file mode 100755 index 0000000000..f6e750a9ea --- /dev/null +++ b/contrib/AutoNUE/models/ocrnet_nv.py @@ -0,0 +1,272 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddleseg.cvlibs import manager, param_init +from paddleseg.models import layers + + +@manager.MODELS.add_component +class OCRNetNV(nn.Layer): + """ + The OCRNet implementation based on PaddlePaddle. + The differences from OCRNet at OCRNet at paddleseg/models/ocrnet.py are + + 1. The convolution bias is set to False + + 2. droput_ rate in SpatialOCRModule is 0.05 + + 3. OCRHead will return `ocr`. + + 4. Will not Logit_ List size to the original size, when MS_ attention=True + + The original article refers to + Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation" + (https://arxiv.org/pdf/1909.11065.pdf) + + Args: + num_classes (int): The unique number of target classes. + backbone (Paddle.nn.Layer): Backbone network. + backbone_indices (tuple): A tuple indicates the indices of output of backbone. + It can be either one or two values, if two values, the first index will be taken as + a deep-supervision feature in auxiliary layer; the second one will be taken as + input of pixel representation. If one value, it is taken by both above. + ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512. + ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + num_classes, + backbone, + backbone_indices, + ocr_mid_channels=512, + ocr_key_channels=256, + align_corners=False, + ms_attention=False): + super().__init__() + + self.backbone = backbone + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = OCRHead( + num_classes=num_classes, + in_channels=in_channels, + ocr_mid_channels=ocr_mid_channels, + ocr_key_channels=ocr_key_channels, + ms_attention=ms_attention) + + self.align_corners = align_corners + self.ms_attention = ms_attention + + def forward(self, x): + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + if not self.ms_attention: + logit_list = [ + F.interpolate( + logit, + x.shape[2:], + mode='bilinear', + align_corners=self.align_corners) for logit in logit_list + ] + return logit_list + + +class OCRHead(nn.Layer): + """ + The Object contextual representation head. + + Args: + num_classes(int): The unique number of target classes. + in_channels(tuple): The number of input channels. + ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512. + ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. + """ + + def __init__(self, + num_classes, + in_channels, + ocr_mid_channels=512, + ocr_key_channels=256, + ms_attention=False): + super().__init__() + + self.num_classes = num_classes + self.ms_attention = ms_attention + self.spatial_gather = SpatialGatherBlock() + self.spatial_ocr = SpatialOCRModule( + ocr_mid_channels, + ocr_key_channels, + ocr_mid_channels, + dropout_rate=0.05) + + self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1] + + self.conv3x3_ocr = layers.ConvBNReLU( + in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1) + self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1) + self.aux_head = nn.Sequential( + layers.ConvBNReLU(in_channels[self.indices[0]], + in_channels[self.indices[0]], 1), + nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1)) + + self.init_weight() + + def forward(self, feat_list): + feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[ + self.indices[1]] + + soft_regions = self.aux_head(feat_shallow) + pixels = self.conv3x3_ocr(feat_deep) + + object_regions = self.spatial_gather(pixels, soft_regions) + ocr = self.spatial_ocr(pixels, object_regions) + + logit = self.cls_head(ocr) + if self.ms_attention: + return [logit, soft_regions, ocr] + return [logit, soft_regions] + + def init_weight(self): + """Initialize the parameters of model parts.""" + for sublayer in self.sublayers(): + if isinstance(sublayer, nn.Conv2D): + param_init.normal_init(sublayer.weight, std=0.001) + elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)): + param_init.constant_init(sublayer.weight, value=1.0) + param_init.constant_init(sublayer.bias, value=0.0) + + +class SpatialGatherBlock(nn.Layer): + """Aggregation layer to compute the pixel-region representation.""" + + def forward(self, pixels, regions): + n, c, h, w = pixels.shape + _, k, _, _ = regions.shape + + # pixels: from (n, c, h, w) to (n, h*w, c) + pixels = paddle.reshape(pixels, (n, c, h * w)) + pixels = paddle.transpose(pixels, (0, 2, 1)) + + # regions: from (n, k, h, w) to (n, k, h*w) + regions = paddle.reshape(regions, (n, k, h * w)) + regions = F.softmax(regions, axis=2) + + # feats: from (n, k, c) to (n, c, k, 1) + feats = paddle.bmm(regions, pixels) + feats = paddle.transpose(feats, (0, 2, 1)) + feats = paddle.unsqueeze(feats, axis=-1) + + return feats + + +class SpatialOCRModule(nn.Layer): + """Aggregate the global object representation to update the representation for each pixel.""" + + def __init__(self, + in_channels, + key_channels, + out_channels, + dropout_rate=0.1): + super().__init__() + + self.attention_block = ObjectAttentionBlock(in_channels, key_channels) + self.conv1x1 = nn.Sequential( + layers.ConvBNReLU( + 2 * in_channels, out_channels, 1, bias_attr=False), + nn.Dropout2D(dropout_rate)) + + def forward(self, pixels, regions): + context = self.attention_block(pixels, regions) + feats = paddle.concat([context, pixels], axis=1) + feats = self.conv1x1(feats) + + return feats + + +class ObjectAttentionBlock(nn.Layer): + """A self-attention module.""" + + def __init__(self, in_channels, key_channels): + super().__init__() + + self.in_channels = in_channels + self.key_channels = key_channels + + self.f_pixel = nn.Sequential( + layers.ConvBNReLU(in_channels, key_channels, 1, bias_attr=False), + layers.ConvBNReLU(key_channels, key_channels, 1, bias_attr=False)) + + self.f_object = nn.Sequential( + layers.ConvBNReLU(in_channels, key_channels, 1, bias_attr=False), + layers.ConvBNReLU(key_channels, key_channels, 1, bias_attr=False)) + + self.f_down = layers.ConvBNReLU( + in_channels, key_channels, 1, bias_attr=False) + + self.f_up = layers.ConvBNReLU( + key_channels, in_channels, 1, bias_attr=False) + + def forward(self, x, proxy): + n, _, h, w = x.shape + + # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels) + query = self.f_pixel(x) + query = paddle.reshape(query, (n, self.key_channels, -1)) + query = paddle.transpose(query, (0, 2, 1)) + + # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2) + key = self.f_object(proxy) + key = paddle.reshape(key, (n, self.key_channels, -1)) + + # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels) + value = self.f_down(proxy) + value = paddle.reshape(value, (n, self.key_channels, -1)) + value = paddle.transpose(value, (0, 2, 1)) + + # sim_map (n, h1*w1, h2*w2) + sim_map = paddle.bmm(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1) + context = paddle.bmm(sim_map, value) + context = paddle.transpose(context, (0, 2, 1)) + context = paddle.reshape(context, (n, self.key_channels, h, w)) + context = self.f_up(context) + + return context + + +class ProjectionHead(nn.Layer): + def __init__(self, dim_in, proj_dim=256, proj='convmlp'): + super(ProjectionHead, self).__init__() + + if proj == 'linear': + self.proj = nn.Conv2d(dim_in, proj_dim, kernel_size=1) + elif proj == 'convmlp': + self.proj = nn.Sequential( + nn.Conv2d(dim_in, dim_in, kernel_size=1), + nn.SyncBatchNorm(dim_in), nn.ReLU(), + nn.Conv2d(dim_in, proj_dim, kernel_size=1)) + + def forward(self, x): + return F.normalize(self.proj(x), p=2, dim=1) diff --git a/contrib/AutoNUE/predict.py b/contrib/AutoNUE/predict.py new file mode 100755 index 0000000000..94bc0e0c03 --- /dev/null +++ b/contrib/AutoNUE/predict.py @@ -0,0 +1,175 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +import paddle + +from paddleseg.cvlibs import manager, Config +from paddleseg.utils import get_sys_env, logger +from paddleseg.core import predict +import datasets, models + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model prediction') + + # params of prediction + parser.add_argument( + "--config", dest="cfg", help="The config file.", default=None, type=str) + parser.add_argument( + '--model_path', + dest='model_path', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + '--image_path', + dest='image_path', + help= + 'The path of image, it can be a file or a directory including images', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the predicted results', + type=str, + default='./output/result') + + # augment for prediction + parser.add_argument( + '--aug_pred', + dest='aug_pred', + help='Whether to use mulit-scales and flip augment for prediction', + action='store_true') + parser.add_argument( + '--scales', + dest='scales', + nargs='+', + help='Scales for augment', + type=float, + default=1.0) + parser.add_argument( + '--flip_horizontal', + dest='flip_horizontal', + help='Whether to use flip horizontally augment', + action='store_true') + parser.add_argument( + '--flip_vertical', + dest='flip_vertical', + help='Whether to use flip vertically augment', + action='store_true') + + # sliding window prediction + parser.add_argument( + '--is_slide', + dest='is_slide', + help='Whether to prediction by sliding window', + action='store_true') + parser.add_argument( + '--crop_size', + dest='crop_size', + nargs=2, + help= + 'The crop size of sliding window, the first is width and the second is height.', + type=int, + default=None) + parser.add_argument( + '--stride', + dest='stride', + nargs=2, + help= + 'The stride of sliding window, the first is width and the second is height.', + type=int, + default=None) + + return parser.parse_args() + + +def get_image_list(image_path): + """Get image list""" + valid_suffix = [ + '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png' + ] + image_list = [] + image_dir = None + if os.path.isfile(image_path): + if os.path.splitext(image_path)[-1] in valid_suffix: + image_list.append(image_path) + elif os.path.isdir(image_path): + image_dir = image_path + for root, dirs, files in os.walk(image_path): + for f in files: + if '.ipynb_checkpoints' in root: + continue + if os.path.splitext(f)[-1] in valid_suffix: + image_list.append(os.path.join(root, f)) + else: + raise FileNotFoundError( + '`--image_path` is not found. it should be an image file or a directory including images' + ) + + if len(image_list) == 0: + raise RuntimeError('There are not image file in `--image_path`') + + return image_list, image_dir + + +def main(args): + env_info = get_sys_env() + place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ + 'GPUs used'] else 'cpu' + + paddle.set_device(place) + if not args.cfg: + raise RuntimeError('No configuration file specified.') + + cfg = Config(args.cfg) + val_dataset = cfg.val_dataset + if not val_dataset: + raise RuntimeError( + 'The verification dataset is not specified in the configuration file.' + ) + + msg = '\n---------------Config Information---------------\n' + msg += str(cfg) + msg += '------------------------------------------------' + logger.info(msg) + + model = cfg.model + transforms = val_dataset.transforms + image_list, image_dir = get_image_list(args.image_path) + logger.info('Number of predict images = {}'.format(len(image_list))) + predict( + model, + model_path=args.model_path, + transforms=transforms, + image_list=image_list, + image_dir=image_dir, + save_dir=args.save_dir, + aug_pred=args.aug_pred, + scales=args.scales, + flip_horizontal=args.flip_horizontal, + flip_vertical=args.flip_vertical, + is_slide=args.is_slide, + crop_size=args.crop_size, + stride=args.stride, + ) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/contrib/AutoNUE/predict_ensemble.py b/contrib/AutoNUE/predict_ensemble.py new file mode 100755 index 0000000000..d9d901d4cc --- /dev/null +++ b/contrib/AutoNUE/predict_ensemble.py @@ -0,0 +1,192 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +import paddle + +from paddleseg.cvlibs import manager, Config +from paddleseg.utils import get_sys_env, logger +from core import predictEnsemble +import datasets, models + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model prediction') + + # params of prediction + parser.add_argument( + "--config", dest="cfg", help="The config file.", default=None, type=str) + parser.add_argument( + '--model_path', + dest='model_path', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + "--config_hard", + dest="cfg_hard", + help="The config file.", + default=None, + type=str) + parser.add_argument( + '--model_path_hard', + dest='model_path_hard', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + '--image_path', + dest='image_path', + help= + 'The path of image, it can be a file or a directory including images', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the predicted results', + type=str, + default='./output/result') + + # augment for prediction + parser.add_argument( + '--aug_pred', + dest='aug_pred', + help='Whether to use mulit-scales and flip augment for prediction', + action='store_true') + parser.add_argument( + '--scales', + dest='scales', + nargs='+', + help='Scales for augment', + type=float, + default=1.0) + parser.add_argument( + '--flip_horizontal', + dest='flip_horizontal', + help='Whether to use flip horizontally augment', + action='store_true') + parser.add_argument( + '--flip_vertical', + dest='flip_vertical', + help='Whether to use flip vertically augment', + action='store_true') + + # sliding window prediction + parser.add_argument( + '--is_slide', + dest='is_slide', + help='Whether to prediction by sliding window', + action='store_true') + parser.add_argument( + '--crop_size', + dest='crop_size', + nargs=2, + help= + 'The crop size of sliding window, the first is width and the second is height.', + type=int, + default=None) + parser.add_argument( + '--stride', + dest='stride', + nargs=2, + help= + 'The stride of sliding window, the first is width and the second is height.', + type=int, + default=None) + + return parser.parse_args() + + +def get_image_list(image_path): + """Get image list""" + valid_suffix = [ + '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png' + ] + image_list = [] + image_dir = None + if os.path.isfile(image_path): + if os.path.splitext(image_path)[-1] in valid_suffix: + image_list.append(image_path) + elif os.path.isdir(image_path): + image_dir = image_path + for root, dirs, files in os.walk(image_path): + for f in files: + if '.ipynb_checkpoints' in root: + continue + if os.path.splitext(f)[-1] in valid_suffix: + image_list.append(os.path.join(root, f)) + else: + raise FileNotFoundError( + '`--image_path` is not found. it should be an image file or a directory including images' + ) + + if len(image_list) == 0: + raise RuntimeError('There are not image file in `--image_path`') + + return image_list, image_dir + + +def main(args): + env_info = get_sys_env() + place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ + 'GPUs used'] else 'cpu' + + paddle.set_device(place) + if not args.cfg: + raise RuntimeError('No configuration file specified.') + + cfg = Config(args.cfg) + val_dataset = cfg.val_dataset + cfg_hard = Config(args.cfg_hard) + + if not val_dataset: + raise RuntimeError( + 'The verification dataset is not specified in the configuration file.' + ) + + msg = '\n---------------Config Information---------------\n' + msg += str(cfg) + msg += '------------------------------------------------' + logger.info(msg) + + model = cfg.model + model_hard = cfg_hard.model + transforms = val_dataset.transforms + image_list, image_dir = get_image_list(args.image_path) + logger.info('Number of predict images = {}'.format(len(image_list))) + predictEnsemble( + model, + model_hard, + model_path=args.model_path, + model_path_hard=args.model_path_hard, + transforms=transforms, + image_list=image_list, + image_dir=image_dir, + save_dir=args.save_dir, + aug_pred=args.aug_pred, + scales=args.scales, + flip_horizontal=args.flip_horizontal, + flip_vertical=args.flip_vertical, + is_slide=args.is_slide, + crop_size=args.crop_size, + stride=args.stride, + ) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/contrib/AutoNUE/predict_ensemble_three.py b/contrib/AutoNUE/predict_ensemble_three.py new file mode 100644 index 0000000000..49bc7e5a15 --- /dev/null +++ b/contrib/AutoNUE/predict_ensemble_three.py @@ -0,0 +1,211 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +import paddle + +from paddleseg.cvlibs import manager, Config +from paddleseg.utils import get_sys_env, logger +from core.predict_ensemble_three import predictEnsembleThree +import datasets, models + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model prediction') + + # params of prediction + parser.add_argument( + "--config", dest="cfg", help="The config file.", default=None, type=str) + parser.add_argument( + '--model_path', + dest='model_path', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + "--config_1", + dest="cfg_1", + help="The config file.", + default=None, + type=str) + parser.add_argument( + '--model_path_1', + dest='model_path_1', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + "--config_crop", + dest="cfg_crop", + help="The config file.", + default=None, + type=str) + parser.add_argument( + '--model_path_crop', + dest='model_path_crop', + help='The path of model for prediction', + type=str, + default=None) + parser.add_argument( + '--image_path', + dest='image_path', + help= + 'The path of image, it can be a file or a directory including images', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the predicted results', + type=str, + default='./output/result') + + # augment for prediction + parser.add_argument( + '--aug_pred', + dest='aug_pred', + help='Whether to use mulit-scales and flip augment for prediction', + action='store_true') + parser.add_argument( + '--scales', + dest='scales', + nargs='+', + help='Scales for augment', + type=float, + default=1.0) + parser.add_argument( + '--flip_horizontal', + dest='flip_horizontal', + help='Whether to use flip horizontally augment', + action='store_true') + parser.add_argument( + '--flip_vertical', + dest='flip_vertical', + help='Whether to use flip vertically augment', + action='store_true') + + # sliding window prediction + parser.add_argument( + '--is_slide', + dest='is_slide', + help='Whether to prediction by sliding window', + action='store_true') + parser.add_argument( + '--crop_size', + dest='crop_size', + nargs=2, + help= + 'The crop size of sliding window, the first is width and the second is height.', + type=int, + default=None) + parser.add_argument( + '--stride', + dest='stride', + nargs=2, + help= + 'The stride of sliding window, the first is width and the second is height.', + type=int, + default=None) + + return parser.parse_args() + + +def get_image_list(image_path): + """Get image list""" + valid_suffix = [ + '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png' + ] + image_list = [] + image_dir = None + if os.path.isfile(image_path): + if os.path.splitext(image_path)[-1] in valid_suffix: + image_list.append(image_path) + elif os.path.isdir(image_path): + image_dir = image_path + for root, dirs, files in os.walk(image_path): + for f in files: + if '.ipynb_checkpoints' in root: + continue + if os.path.splitext(f)[-1] in valid_suffix: + image_list.append(os.path.join(root, f)) + else: + raise FileNotFoundError( + '`--image_path` is not found. it should be an image file or a directory including images' + ) + + if len(image_list) == 0: + raise RuntimeError('There are not image file in `--image_path`') + + return image_list, image_dir + + +def main(args): + env_info = get_sys_env() + place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ + 'GPUs used'] else 'cpu' + + paddle.set_device(place) + if not args.cfg: + raise RuntimeError('No configuration file specified.') + + cfg = Config(args.cfg) + val_dataset = cfg.val_dataset + cfg_1 = Config(args.cfg_1) + cfg_crop = Config(args.cfg_crop) + val_dataset_crop = cfg_crop.val_dataset + + if not val_dataset: + raise RuntimeError( + 'The verification dataset is not specified in the configuration file.' + ) + + msg = '\n---------------Config Information---------------\n' + msg += str(cfg) + msg += '------------------------------------------------' + logger.info(msg) + + model = cfg.model + model_1 = cfg_1.model + model_crop = cfg_crop.model + transforms = val_dataset.transforms + transforms_crop = val_dataset_crop.transforms + image_list, image_dir = get_image_list(args.image_path) + logger.info('Number of predict images = {}'.format(len(image_list))) + predictEnsembleThree( + model, + model_1, + model_crop, + model_path=args.model_path, + model_path_1=args.model_path_1, + model_path_crop=args.model_path_crop, + transforms=transforms, + transforms_crop=transforms_crop, + image_list=image_list, + image_dir=image_dir, + save_dir=args.save_dir, + aug_pred=args.aug_pred, + scales=args.scales, + flip_horizontal=args.flip_horizontal, + flip_vertical=args.flip_vertical, + is_slide=args.is_slide, + crop_size=args.crop_size, + stride=args.stride, + ) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/contrib/AutoNUE/scripts/train.py b/contrib/AutoNUE/scripts/train.py new file mode 100755 index 0000000000..d464592047 --- /dev/null +++ b/contrib/AutoNUE/scripts/train.py @@ -0,0 +1,279 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +from collections import deque +import shutil + +import paddle +import paddle.nn.functional as F + +from paddleseg.utils import TimeAverager, calculate_eta, resume, logger +from core.val import evaluate + +#from core.val_crop import evaluate + + +def check_logits_losses(logits_list, losses): + len_logits = len(logits_list) + len_losses = len(losses['types']) + if len_logits != len_losses: + raise RuntimeError( + 'The length of logits_list should equal to the types of loss config: {} != {}.' + .format(len_logits, len_losses)) + + +def loss_computation(logits_list, labels, losses, edges=None): + check_logits_losses(logits_list, losses) + loss_list = [] + for i in range(len(logits_list)): + logits = logits_list[i] + loss_i = losses['types'][i] + # Whether to use edges as labels According to loss type. + if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label: + loss_list.append(losses['coef'][i] * loss_i(logits, edges)) + else: + loss_list.append(losses['coef'][i] * loss_i(logits, labels)) + return loss_list + + +def train(model, + train_dataset, + val_dataset=None, + aug_eval=False, + flip_horizontal_eval=False, + optimizer=None, + save_dir='output', + iters=10000, + batch_size=2, + resume_model=None, + save_interval=1000, + log_iters=10, + num_workers=0, + use_vdl=False, + losses=None, + keep_checkpoint_max=5): + """ + Launch training. + + Args: + model(nn.Layer): A sementic segmentation model. + train_dataset (paddle.io.Dataset): Used to read and process training datasets. + val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. + aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False. + flip_horizontal_eval (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True. + optimizer (paddle.optimizer.Optimizer): The optimizer. + save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. + iters (int, optional): How may iters to train the model. Defualt: 10000. + batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. + resume_model (str, optional): The path of resume model. + save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. + log_iters (int, optional): Display logging information at every log_iters. Default: 10. + num_workers (int, optional): Num workers for data loader. Default: 0. + use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. + losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']). + The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient. + keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. + """ + nranks = paddle.distributed.ParallelEnv().nranks + local_rank = paddle.distributed.ParallelEnv().local_rank + + start_iter = 0 + if resume_model is not None: + start_iter = resume(model, optimizer, resume_model) + + if not os.path.isdir(save_dir): + if os.path.exists(save_dir): + os.remove(save_dir) + os.makedirs(save_dir) + + if nranks > 1: + # Initialize parallel training environment. + paddle.distributed.init_parallel_env() + ddp_model = paddle.DataParallel(model) + + +# for item in ddp_model.named_parameters(): +# if item[0].find('scale_attn')==-1: +# item[1].stop_gradient=True + + batch_sampler = paddle.io.DistributedBatchSampler( + train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) + + loader = paddle.io.DataLoader( + train_dataset, + batch_sampler=batch_sampler, + num_workers=num_workers, + return_list=True, + ) + + if use_vdl: + from visualdl import LogWriter + log_writer = LogWriter(save_dir) + + avg_loss = 0.0 + avg_loss_list = [] + iters_per_epoch = len(batch_sampler) + best_mean_iou = -1.0 + best_model_iter = -1 + reader_cost_averager = TimeAverager() + batch_cost_averager = TimeAverager() + save_models = deque() + batch_start = time.time() + + iter = start_iter + while iter < iters: + + for data in loader: + iter += 1 + if iter > iters: + break + reader_cost_averager.record(time.time() - batch_start) + images = data[0] + labels = data[1].astype('int64') + edges = None + if len(data) == 3: + edges = data[2].astype('int64') + + if hasattr(train_dataset, + 'shuffle') and iter % iters_per_epoch == 0: + train_dataset.shuffle() + + if nranks > 1: + logits_list = ddp_model(images) + else: + logits_list = model(images) + loss_list = loss_computation( + logits_list=logits_list, + labels=labels, + losses=losses, + edges=edges) + loss = sum(loss_list) + loss.backward() + + optimizer.step() + lr = optimizer.get_lr() + if isinstance(optimizer._learning_rate, + paddle.optimizer.lr.LRScheduler): + optimizer._learning_rate.step() + model.clear_gradients() + avg_loss += loss.numpy()[0] + if not avg_loss_list: + avg_loss_list = [l.numpy() for l in loss_list] + else: + for i in range(len(loss_list)): + avg_loss_list[i] += loss_list[i].numpy() + batch_cost_averager.record( + time.time() - batch_start, num_samples=batch_size) + + if (iter) % log_iters == 0 and local_rank == 0: + avg_loss /= log_iters + avg_loss_list = [l[0] / log_iters for l in avg_loss_list] + remain_iters = iters - iter + avg_train_batch_cost = batch_cost_averager.get_average() + avg_train_reader_cost = reader_cost_averager.get_average() + eta = calculate_eta(remain_iters, avg_train_batch_cost) + logger.info( + "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}" + .format((iter - 1) // iters_per_epoch + 1, iter, iters, + avg_loss, lr, avg_train_batch_cost, + avg_train_reader_cost, + batch_cost_averager.get_ips_average(), eta)) + if use_vdl: + log_writer.add_scalar('Train/loss', avg_loss, iter) + # Record all losses if there are more than 2 losses. + if len(avg_loss_list) > 1: + avg_loss_dict = {} + for i, value in enumerate(avg_loss_list): + avg_loss_dict['loss_' + str(i)] = value + for key, value in avg_loss_dict.items(): + log_tag = 'Train/' + key + log_writer.add_scalar(log_tag, value, iter) + + log_writer.add_scalar('Train/lr', lr, iter) + log_writer.add_scalar('Train/batch_cost', + avg_train_batch_cost, iter) + log_writer.add_scalar('Train/reader_cost', + avg_train_reader_cost, iter) + avg_loss = 0.0 + avg_loss_list = [] + reader_cost_averager.reset() + batch_cost_averager.reset() + + if (iter % save_interval == 0 + or iter == iters) and (val_dataset is not None): + num_workers = 1 if num_workers > 0 else 0 + mean_iou, acc = evaluate( + model, + val_dataset, + aug_eval=aug_eval, + scales=1.0, + flip_horizontal=False, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + num_workers=num_workers) + model.train() + + if (iter % save_interval == 0 or iter == iters) and local_rank == 0: + current_save_dir = os.path.join(save_dir, + "iter_{}".format(iter)) + if not os.path.isdir(current_save_dir): + os.makedirs(current_save_dir) + paddle.save(model.state_dict(), + os.path.join(current_save_dir, 'model.pdparams')) + paddle.save(optimizer.state_dict(), + os.path.join(current_save_dir, 'model.pdopt')) + save_models.append(current_save_dir) + if len(save_models) > keep_checkpoint_max > 0: + model_to_remove = save_models.popleft() + shutil.rmtree(model_to_remove) + + if val_dataset is not None: + if mean_iou > best_mean_iou: + best_mean_iou = mean_iou + best_model_iter = iter + best_model_dir = os.path.join(save_dir, "best_model") + paddle.save( + model.state_dict(), + os.path.join(best_model_dir, 'model.pdparams')) + logger.info( + '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.' + .format(best_mean_iou, best_model_iter)) + + if use_vdl: + log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter) + log_writer.add_scalar('Evaluate/Acc', acc, iter) + batch_start = time.time() + + # Calculate flops. + if local_rank == 0: + + def count_syncbn(m, x, y): + x = x[0] + nelements = x.numel() + m.total_ops += int(2 * nelements) + + _, c, h, w = images.shape + flops = paddle.flops( + model, [1, c, h, w], + custom_ops={paddle.nn.SyncBatchNorm: count_syncbn}) + logger.info(flops) + + # Sleep for half a second to let dataloader release resources. + time.sleep(0.5) + if use_vdl: + log_writer.close() diff --git a/contrib/AutoNUE/tools/IDD_labeling.py b/contrib/AutoNUE/tools/IDD_labeling.py new file mode 100755 index 0000000000..837c6743b6 --- /dev/null +++ b/contrib/AutoNUE/tools/IDD_labeling.py @@ -0,0 +1,130 @@ +import os +import numpy as np +import cv2 +from PIL import Image +from paddleseg import utils +import xml.dom.minidom + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) + + +def get_image_list(image_path): + """Get image list""" + valid_suffix = [ + '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png' + ] + image_list = [] + image_dir = None + if os.path.isfile(image_path): + if os.path.splitext(image_path)[-1] in valid_suffix: + image_list.append(image_path) + elif os.path.isdir(image_path): + image_dir = image_path + for root, dirs, files in os.walk(image_path): + for f in files: + if '.ipynb_checkpoints' in root: + continue + if os.path.splitext(f)[-1] in valid_suffix: + image_list.append(os.path.join(root.split('/')[-1], f)) + else: + raise FileNotFoundError( + '`--image_path` is not found. it should be an image file or a directory including images' + ) + + if len(image_list) == 0: + raise RuntimeError('There are not image file in `--image_path`') + + return image_list, image_dir + + +def refine_pred(): + image_list, image_dir = get_image_list( + 'detection_out/pseudo_color_prediction') + for ii in image_list: + name_pred = 'detection_out/pseudo_color_prediction/' + ii + name_label = 'data/IDD_Detection/Annotations/all/' + ii[:-3] + 'xml' + pred = np.array(Image.open(name_pred)).astype(np.float32) + if not os.path.exists(name_label): + pred_mask = utils.visualize.get_pseudo_color_map(pred) + pred_saved_path = 'detect_out/pred_refine/' + ii + mkdir(pred_saved_path) + pred_mask.save(pred_saved_path) + continue + + dom = xml.dom.minidom.parse(name_label) + root = dom.documentElement + objects = root.getElementsByTagName("object") + for item in objects: + name = item.getElementsByTagName("name")[0] + if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light': + print(ii) + xmin = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'xmin')[0].firstChild.data) + ymin = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'ymin')[0].firstChild.data) + xmax = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'xmax')[0].firstChild.data) + ymax = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'ymax')[0].firstChild.data) + if name.firstChild.data == 'traffic sign': + pred[ymin:ymax, xmin:xmax] = 18 + elif name.firstChild.data == 'traffic light': + pred[ymin:ymax, xmin:xmax] = 19 + + pred_mask = utils.visualize.get_pseudo_color_map(pred) + pred_saved_path = 'detect_out/pred_refine/' + ii + mkdir(pred_saved_path) + pred_mask.save(pred_saved_path) + + +def test(): + path = '/Users/liliulei/Downloads/IDD_Detection/JPEGImages/frontNear/' + image_list, image_dir = get_image_list(path) + + for ii in image_list: + name_xml = '/Users/liliulei/Downloads/IDD_Detection/Annotations/frontNear/' + ii[: + -3] + 'xml' + image = cv2.imread(path + ii) + # print(image.shape) + (h, w) = image.shape[0:2] + + pred = np.zeros_like(image) + + dom = xml.dom.minidom.parse(name_xml) + root = dom.documentElement + objects = root.getElementsByTagName("object") + for item in objects: + name = item.getElementsByTagName("name")[0] + print(name.firstChild.data) + if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light': + xmin = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'xmin')[0].firstChild.data) + ymin = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'ymin')[0].firstChild.data) + xmax = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'xmax')[0].firstChild.data) + ymax = int( + item.getElementsByTagName('bndbox')[0].getElementsByTagName( + 'ymax')[0].firstChild.data) + if name.firstChild.data == 'traffic sign': + pred[ymin:ymax, xmin:xmax, 0] = 255 + elif name.firstChild.data == 'traffic light': + pred[ymin:ymax, xmin:xmax, 1] = 255 + + new_im = image * 0.5 + pred * 0.5 + + cv2.imwrite(ii.split('/')[-1][:-3] + 'png', new_im) + + +refine_pred() diff --git a/contrib/AutoNUE/train.py b/contrib/AutoNUE/train.py new file mode 100755 index 0000000000..b1b14d25fb --- /dev/null +++ b/contrib/AutoNUE/train.py @@ -0,0 +1,151 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + +import paddle +from paddleseg.cvlibs import manager, Config +from paddleseg.utils import get_sys_env, logger + +import datasets, models +from scripts.train import train + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model training') + # params of training + parser.add_argument( + "--config", dest="cfg", help="The config file.", default=None, type=str) + parser.add_argument( + '--iters', + dest='iters', + help='iters for training', + type=int, + default=None) + parser.add_argument( + '--batch_size', + dest='batch_size', + help='Mini batch size of one gpu or cpu', + type=int, + default=None) + parser.add_argument( + '--learning_rate', + dest='learning_rate', + help='Learning rate', + type=float, + default=None) + parser.add_argument( + '--save_interval', + dest='save_interval', + help='How many iters to save a model snapshot once during training.', + type=int, + default=1000) + parser.add_argument( + '--resume_model', + dest='resume_model', + help='The path of resume model', + type=str, + default=None) + parser.add_argument( + '--save_dir', + dest='save_dir', + help='The directory for saving the model snapshot', + type=str, + default='./output') + parser.add_argument( + '--keep_checkpoint_max', + dest='keep_checkpoint_max', + help='Maximum number of checkpoints to save', + type=int, + default=5) + parser.add_argument( + '--num_workers', + dest='num_workers', + help='Num workers for data loader', + type=int, + default=0) + parser.add_argument( + '--do_eval', + dest='do_eval', + help='Eval while training', + action='store_true') + parser.add_argument( + '--log_iters', + dest='log_iters', + help='Display logging information at every log_iters', + default=10, + type=int) + parser.add_argument( + '--use_vdl', + dest='use_vdl', + help='Whether to record the data to VisualDL during training', + action='store_true') + + return parser.parse_args() + + +def main(args): + env_info = get_sys_env() + info = ['{}: {}'.format(k, v) for k, v in env_info.items()] + info = '\n'.join(['', format('Environment Information', '-^48s')] + info + + ['-' * 48]) + logger.info(info) + + place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ + 'GPUs used'] else 'cpu' + + paddle.set_device(place) + if not args.cfg: + raise RuntimeError('No configuration file specified.') + + cfg = Config( + args.cfg, + learning_rate=args.learning_rate, + iters=args.iters, + batch_size=args.batch_size) + + train_dataset = cfg.train_dataset + if train_dataset is None: + raise RuntimeError( + 'The training dataset is not specified in the configuration file.') + val_dataset = cfg.val_dataset if args.do_eval else None + losses = cfg.loss + + msg = '\n---------------Config Information---------------\n' + msg += str(cfg) + msg += '------------------------------------------------' + logger.info(msg) + + train( + cfg.model, + train_dataset, + val_dataset=val_dataset, + aug_eval=True, + flip_horizontal_eval=False, + optimizer=cfg.optimizer, + save_dir=args.save_dir, + iters=cfg.iters, + batch_size=cfg.batch_size, + resume_model=args.resume_model, + save_interval=args.save_interval, + log_iters=args.log_iters, + num_workers=args.num_workers, + use_vdl=args.use_vdl, + losses=losses, + keep_checkpoint_max=args.keep_checkpoint_max) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/contrib/AutoNUE/val.py b/contrib/AutoNUE/val.py new file mode 100755 index 0000000000..e66920bf1d --- /dev/null +++ b/contrib/AutoNUE/val.py @@ -0,0 +1,137 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os + +import paddle + +from paddleseg.cvlibs import manager, Config +from core.val import evaluate +from paddleseg.utils import get_sys_env, logger, utils +import datasets, models + + +def parse_args(): + parser = argparse.ArgumentParser(description='Model evaluation') + + # params of evaluate + parser.add_argument( + "--config", dest="cfg", help="The config file.", default=None, type=str) + parser.add_argument( + '--model_path', + dest='model_path', + help='The path of model for evaluation', + type=str, + default=None) + parser.add_argument( + '--num_workers', + dest='num_workers', + help='Num workers for data loader', + type=int, + default=0) + + # augment for evaluation + parser.add_argument( + '--aug_eval', + dest='aug_eval', + help='Whether to use mulit-scales and flip augment for evaluation', + action='store_true') + parser.add_argument( + '--scales', + dest='scales', + nargs='+', + help='Scales for augment', + type=float, + default=1.0) + parser.add_argument( + '--flip_horizontal', + dest='flip_horizontal', + help='Whether to use flip horizontally augment', + action='store_true') + parser.add_argument( + '--flip_vertical', + dest='flip_vertical', + help='Whether to use flip vertically augment', + action='store_true') + + # sliding window evaluation + parser.add_argument( + '--is_slide', + dest='is_slide', + help='Whether to evaluate by sliding window', + action='store_true') + parser.add_argument( + '--crop_size', + dest='crop_size', + nargs=2, + help= + 'The crop size of sliding window, the first is width and the second is height.', + type=int, + default=None) + parser.add_argument( + '--stride', + dest='stride', + nargs=2, + help= + 'The stride of sliding window, the first is width and the second is height.', + type=int, + default=None) + + return parser.parse_args() + + +def main(args): + env_info = get_sys_env() + place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[ + 'GPUs used'] else 'cpu' + + paddle.set_device(place) + if not args.cfg: + raise RuntimeError('No configuration file specified.') + + cfg = Config(args.cfg) + val_dataset = cfg.val_dataset + if val_dataset is None: + raise RuntimeError( + 'The verification dataset is not specified in the configuration file.' + ) + + msg = '\n---------------Config Information---------------\n' + msg += str(cfg) + msg += '------------------------------------------------' + logger.info(msg) + + model = cfg.model + utils.load_entire_model(model, args.model_path) + + logger.info('Loaded trained params of model successfully') + + evaluate( + model, + val_dataset, + aug_eval=args.aug_eval, + scales=args.scales, + flip_horizontal=args.flip_horizontal, + flip_vertical=args.flip_vertical, + is_slide=args.is_slide, + crop_size=args.crop_size, + stride=args.stride, + num_workers=args.num_workers, + ) + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/contrib/CityscapesSOTA/README.md b/contrib/CityscapesSOTA/README.md index f95240d621..0a016b9dbb 100644 --- a/contrib/CityscapesSOTA/README.md +++ b/contrib/CityscapesSOTA/README.md @@ -8,7 +8,7 @@ Based on the above work, we made some optimizations: We achieve mIoU of **87%** on Cityscapes validation set. -![demo](../../docs/images/cityscapes.gif) +![demo](https://user-images.githubusercontent.com/53808988/130719591-3e0d44b4-59a8-4633-bff2-7ce7da1c52fc.gif) ## Installation diff --git a/contrib/EISeg/README.md b/contrib/EISeg/README.md deleted file mode 100644 index 858d2383e7..0000000000 --- a/contrib/EISeg/README.md +++ /dev/null @@ -1,85 +0,0 @@ -[![Python 3.6](https://img.shields.io/badge/python-3.6+-blue.svg)](https://www.python.org/downloads/release/python-360/) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) - - -# EISeg - -EISeg(Efficient Interactive Segmentation)是基于飞桨开发的一个高效智能的交互式分割标注软件. 它使用了RITM(Reviving Iterative Training with Mask Guidance for Interactive Segmentation)算法,涵盖了高精度和轻量级等不同方向的高质量交互式分割模型,方便开发者快速实现语义及实例标签的标注,降低标注成本。 另外,将EISeg获取到的标注应用到PaddleSeg提供的其他分割模型进行训练,便可得到定制化场景的高精度模型,打通分割任务从数据标注到模型训练及预测的全流程。 - - - -## 模型准备 -在使用EIseg前,请先下载模型参数。EISeg开放了在COCO+LVIS和大规模人像数据上训练的四个标注模型,满足通用场景和人像场景的标注需求。其中模型结构对应EISeg交互工具中的网络选择模块,用户需要根据自己的场景需求选择不同的网络结构和加载参数。 - -| 模型类型 | 适用场景 | 模型结构 | 下载地址| -| --- | --- | --- | ---| -| 高精度模型 | 适用于通用场景的图像标注。 |HRNet18_OCR64 | [hrnet18_ocr64_cocolvis](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18_ocr64_cocolvis.pdparams) | -| 轻量化模型 | 适用于通用场景的图像标注。 |HRNet18_OCR48 | [hrnet18s_ocr48_cocolvis](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18s_ocr48_cocolvis.pdparams) | -| 高精度模型 | 适用于人像标注场景。 |HRNet18_OCR64 | [hrnet18_ocr64_human](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18_ocr64_human.pdparams) | -| 轻量化模型 | 适用于人像标注场景。 |HRNet18_OCR48 | [hrnet18s_ocr48_human](https://bj.bcebos.com/paddleseg/dygraph/interactive_segmentation/ritm/hrnet18s_ocr48_human.pdparams) | - - - - -## 安装 - -EISeg提供多种安装方式,其中使用[pip](#PIP),[conda](#Conda)和[运行代码](#运行代码)方式可兼容Windows,Mac OS和Linux。为了避免环境冲突,推荐在conda创建的虚拟环境中安装。 - -版本要求: - -* PaddlePaddle >= 2.1.0 - -PaddlePaddle安装请参考[官网](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/windows-pip.html)。 - -### PIP - -pip安装方式如下: - -```shell -pip install eiseg -``` -pip会自动安装依赖。安装完成后命令行输入: -```shell -eiseg -``` -即可运行软件。 - -### Conda -首先安装Anaconda或Miniconda,过程参考[清华镜像教程](https://mirrors.tuna.tsinghua.edu.cn/help/anaconda/)。 - -```shell -conda create -n eiseg python=3.8 -conda activate eiseg -conda install qtpy -pip install eiseg -eiseg -``` - -### Windows exe - -EISeg使用[QPT](https://github.com/GT-ZhangAcer/QPT)进行打包。可以从[百度云盘](https://pan.baidu.com/s/1K7cbNnlCtfEXcuiamdxjWA)(提取码:82z9)下载目前最新的EISeg,也可从[Releases](https://github.com/PaddleCV-SIG/EISeg/releases)中进行下载。解压后双击启动程序.exe即可运行程序。程序第一次运行会初始化安装所需要的包,请稍等片刻。 - -### 运行代码 - -首先clone本项目到本地。 -```shell -git clone https://github.com/PaddlePaddle/PaddleSeg -cd PaddleSeg/contrib/eiseg -pip install -r requirements.txt -python -m eiseg -``` -即可开始执行。 - - - -# 开发者 -[YuYing Hao](https://github.com/haoyuying) - -[YiZhou Chen](https://github.com/geoyee) - -[Lin Han](https://github.com/linhandev/) - -[GT](https://github.com/GT-ZhangAcer) - -[ZhiLiang Yu](https://github.com/yzl19940819) - - diff --git a/contrib/EISeg/eiseg/__init__.py b/contrib/EISeg/eiseg/__init__.py deleted file mode 100644 index 1b83903809..0000000000 --- a/contrib/EISeg/eiseg/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -import sys -import os.path as osp - -pjpath = osp.dirname(osp.realpath(__file__)) -sys.path.append(pjpath) - - -from run import main -from models import models - - -__APPNAME__ = "EISeg" diff --git a/contrib/EISeg/eiseg/__main__.py b/contrib/EISeg/eiseg/__main__.py deleted file mode 100644 index 8889620ba3..0000000000 --- a/contrib/EISeg/eiseg/__main__.py +++ /dev/null @@ -1,4 +0,0 @@ -from run import main - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/contrib/EISeg/eiseg/app.py b/contrib/EISeg/eiseg/app.py deleted file mode 100644 index 58ad908542..0000000000 --- a/contrib/EISeg/eiseg/app.py +++ /dev/null @@ -1,920 +0,0 @@ -import os -import os.path as osp -from functools import partial - -from qtpy import QtGui, QtCore, QtWidgets -from qtpy.QtWidgets import QMainWindow, QMessageBox, QTableWidgetItem -from qtpy.QtGui import QImage, QPixmap -from qtpy.QtCore import Qt -import paddle -import cv2 -import numpy as np -from PIL import Image - -from util.colormap import ColorMask -from controller import InteractiveController -from ui import Ui_EISeg, Ui_Help -from models import models, findModelbyName -import util - - -__APPNAME__ = "EISeg" -here = osp.dirname(osp.abspath(__file__)) - - -class APP_EISeg(QMainWindow, Ui_EISeg): - def __init__(self, parent=None): - super(APP_EISeg, self).__init__(parent) - self.setupUi(self) - # 显示帮助 - self.help_dialog = QtWidgets.QDialog() - help_ui = Ui_Help() - help_ui.setupUi(self.help_dialog) - - # app变量 - self.controller = None - self.outputDir = None # 标签保存路径 - self.labelPaths = [] # 保存所有从outputdir发现的标签文件路径 - self.currIdx = 0 # 标注文件夹时到第几个了 - self.currentPath = None - self.filePaths = [] # 标注文件夹时所有文件路径 - self.modelType = models[0] # 模型类型 - # TODO: labelList用一个class实现 - self.labelList = [] # 标签列表(数字,名字,颜色) - self.config = util.parseConfigs(osp.join(here, "config/config.yaml")) - self.maskColormap = ColorMask(color_path=osp.join(here, "config/colormap.txt")) - # self.labelList = [[1, "人", [0, 0, 0]], [2, "车", [128, 128, 128]]] - self.isDirty = False - self.settings = QtCore.QSettings( - osp.join(here, "config/setting.ini"), QtCore.QSettings.IniFormat - ) - print(self.settings.fileName()) - - self.recentFiles = self.settings.value("recent_files", []) - self.recentParams = self.settings.value("recent_params", []) - # 画布部分 - self.canvas.clickRequest.connect(self.canvasClick) - self.image = None - - self.initActions() - - ## 按钮点击 - self.btnSave.clicked.connect(self.saveLabel) # 保存 - self.listFiles.itemDoubleClicked.connect(self.listClicked) # list选择 - self.comboModelSelect.currentIndexChanged.connect(self.changeModelType) # 模型选择 - self.btnAddClass.clicked.connect(self.addLabel) - self.btnParamsSelect.clicked.connect(self.changeModel) # 模型参数选择 - - # 滑动 - self.sldOpacity.valueChanged.connect(self.maskOpacityChanged) - self.sldClickRadius.valueChanged.connect(self.clickRadiusChanged) - self.sldThresh.valueChanged.connect(self.threshChanged) - - # 标签列表点击 - # TODO: 更换标签颜色之后重绘所有已有标签 - self.labelListTable.cellDoubleClicked.connect(self.labelListDoubleClick) - self.labelListTable.cellClicked.connect(self.labelListClicked) - self.labelListTable.cellChanged.connect(self.labelListItemChanged) - - labelListFile = self.settings.value("label_list_file") - print(labelListFile) - self.labelList = util.readLabel(labelListFile) - self.refreshLabelList() - - # TODO: 打开上次关软件时用的模型 - # TODO: 在ui展示后再加载模型 - # 在run中异步加载近期吗,模型参数 - - # 消息栏(放到load_recent_params不会显示) - if len(self.recentParams) == 0: - self.statusbar.showMessage("模型参数未加载") - else: - if osp.exists(self.recentParams[-1]["path"]): - # TODO: 能不能删除注册表中找不到的路径 - self.statusbar.showMessage("正在加载最近模型参数") - else: - self.statusbar.showMessage("最近参数不存在,请重新加载参数") - - def updateFileMenu(self): - def exists(filename): - return osp.exists(str(filename)) - - menu = self.actions.recent_files - menu.clear() - print("recentFiles", self.recentFiles) - files = [f for f in self.recentFiles if f != self.currentPath and exists(f)] - for i, f in enumerate(files): - if osp.exists(f): - icon = util.newIcon("File") - action = QtWidgets.QAction( - icon, "&%d %s" % (i + 1, QtCore.QFileInfo(f).fileName()), self - ) - action.triggered.connect(partial(self.loadImage, f, True)) - menu.addAction(action) - - def updateParamsMenu(self): - def exists(filename): - return osp.exists(str(filename)) - - menu = self.actions.recent_params - menu.clear() - print("recentParams", self.recentParams) - files = [f for f in self.recentParams if exists(f["path"])] - for i, f in enumerate(files): - if osp.exists(f["path"]): - icon = util.newIcon("Model") - action = QtWidgets.QAction( - icon, - "&%d %s" % (i + 1, QtCore.QFileInfo(f["path"]).fileName()), - self, - ) - action.triggered.connect( - partial(self.load_model_params, f["path"], f["type"]) - ) - menu.addAction(action) - - def toBeImplemented(self): - self.statusbar.showMessage("功能尚在开发") - - def initActions(self): - def menu(title, actions=None): - menu = self.menuBar().addMenu(title) - if actions: - util.addActions(menu, actions) - return menu - - action = partial(util.newAction, self) - shortcuts = self.config["shortcut"] - turn_prev = action( - self.tr("&上一张"), - partial(self.turnImg, -1), - shortcuts["turn_prev"], - "Prev", - self.tr("翻到上一张图片"), - ) - turn_next = action( - self.tr("&下一张"), - partial(self.turnImg, 1), - shortcuts["turn_next"], - "Next", - self.tr("翻到下一张图片"), - ) - open_image = action( - self.tr("&打开图像"), - self.openImage, - shortcuts["open_image"], - "OpenImage", - self.tr("打开一张图像进行标注"), - ) - open_folder = action( - self.tr("&打开文件夹"), - self.openFolder, - shortcuts["open_folder"], - "OpenFolder", - self.tr("打开一个文件夹下所有的图像进行标注"), - ) - open_recent = action( - self.tr("&最近标注"), - self.toBeImplemented, - "", - # TODO: 搞个图 - "", - self.tr("打开一个文件夹下所有的图像进行标注"), - ) - # model_loader = action( - # self.tr("&选择模型参数"), - # self.loadModel, - # shortcuts["load_model"], - # "Model", - # self.tr("加载一个模型参数"), - # ) - change_output_dir = action( - self.tr("&改变标签保存路径"), - self.changeOutputDir, - shortcuts["change_output_dir"], - "ChangeLabelPath", - self.tr("打开一个文件夹下所有的图像进行标注"), - ) - quick_start = action( - self.tr("&快速上手"), - self.toBeImplemented, - None, - "Use", - self.tr("快速上手介绍"), - ) - about = action( - self.tr("&关于软件"), - self.toBeImplemented, - None, - "About", - self.tr("关于这个软件和开发团队"), - ) - grid_ann = action( - self.tr("&N²宫格标注"), - self.toBeImplemented, - None, - "N2", - self.tr("使用N²宫格进行细粒度标注"), - ) - finish_object = action( - self.tr("&完成当前目标"), - self.finishObject, - shortcuts["finish_object"], - "Ok", - self.tr("完成当前目标的标注"), - ) - clear = action( - self.tr("&清除所有标注"), - self.undoAll, - shortcuts["clear"], - "Clear", - self.tr("清除所有标注信息"), - ) - undo = action( - self.tr("&撤销"), - self.undoClick, - shortcuts["undo"], - "Undo", - self.tr("撤销一次点击"), - ) - redo = action( - self.tr("&重做"), - self.toBeImplemented, - shortcuts["redo"], - "Redo", - self.tr("重做一次点击"), - ) - save = action( - self.tr("&保存"), - self.saveLabel, - "", - "Save", - self.tr("保存图像标签"), - ) - save_as = action( - self.tr("&另存为"), - partial(self.saveLabel, True), - "", - "OtherSave", - self.tr("指定标签保存路径"), - ) - auto_save = action( - self.tr("&自动保存"), - self.toggleAutoSave, - "", - "AutoSave", - self.tr("翻页同时自动保存"), - checkable=True, - ) - # auto_save.setChecked(self.config.get("auto_save", False)) - - recent = action( - self.tr("&近期图片"), - self.toBeImplemented, - "", - "RecentDocuments", - self.tr("近期打开的图片"), - ) - close = action( - self.tr("&关闭"), - self.toBeImplemented, - "", - "End", - self.tr("关闭当前图像"), - ) - connected = action( - self.tr("&连通块"), - self.toBeImplemented, - "", - # TODO: 搞个图 - "", - self.tr(""), - ) - quit = action( - self.tr("&退出"), - self.close, - "", - "Close", - self.tr("退出软件"), - ) - save_label = action( - self.tr("&保存标签列表"), - self.saveLabelList, - "", - "ExportLabel", - self.tr("将标签保存成标签配置文件"), - ) - load_label = action( - self.tr("&加载标签列表"), - self.loadLabelList, - "", - "ImportLabel", - self.tr("从标签配置文件中加载标签"), - ) - clear_label = action( - self.tr("&清空标签列表"), - self.clearLabelList, - "", - "ClearLabel", - self.tr("清空所有的标签"), - ) - shortcuts = action( - self.tr("&快捷键列表"), - self.toBeImplemented, - "", - "Shortcut", - self.tr("查看所有快捷键"), - ) - recent_files = QtWidgets.QMenu(self.tr("近期文件")) - recent_files.aboutToShow.connect(self.updateFileMenu) - recent_params = QtWidgets.QMenu(self.tr("近期模型参数")) - recent_params.aboutToShow.connect(self.updateParamsMenu) - # TODO: 改用manager - self.actions = util.struct( - auto_save=auto_save, - recent_files=recent_files, - recent_params=recent_params, - fileMenu=( - open_image, - open_folder, - change_output_dir, - # model_loader, - recent_files, - recent_params, - None, - save, - save_as, - auto_save, - turn_next, - turn_prev, - close, - None, - quit, - ), - labelMenu=(save_label, load_label, clear_label, None, grid_ann), - helpMenu=(quick_start, about, shortcuts), - toolBar=(finish_object, clear, undo, redo, turn_prev, turn_next), - ) - menu("文件", self.actions.fileMenu) - menu("标注", self.actions.labelMenu) - menu("帮助", self.actions.helpMenu) - util.addActions(self.toolBar, self.actions.toolBar) - - def queueEvent(self, function): - # TODO: 研究这个东西是不是真的不影响ui - QtCore.QTimer.singleShot(0, function) - - def showShortcuts(self): - self.toBeImplemented() - - def toggleAutoSave(self, save): - if save and not self.outputDir: - self.changeOutputDir() - if save and not self.outputDir: - save = False - self.actions.auto_save.setChecked(save) - self.config["auto_save"] = save - util.saveConfigs(osp.join(here, "config/config.yaml"), self.config) - - def changeModelType(self, idx): - self.modelType = models[idx] - print("model type:", self.modelType) - - def changeModel(self): - # TODO: 设置gpu还是cpu运行 - formats = ["*.pdparams"] - filters = self.tr("paddle model params files (%s)") % " ".join(formats) - params_path, _ = QtWidgets.QFileDialog.getOpenFileName( - self, - self.tr("%s - 选择模型参数") % __APPNAME__, - "/home/lin/Desktop", - filters, - ) - print(params_path) - if osp.exists(params_path): - self.load_model_params(params_path) - # 最近参数 - model_dict = {"path": params_path, "type": self.modelType.name} - if model_dict not in self.recentParams: - self.recentParams.append(model_dict) - if len(self.recentParams) > 10: - del self.recentParams[0] - self.settings.setValue("recent_params", self.recentParams) - - def load_model_params(self, params_path, model_type=None): - if model_type is not None: - self.modelType, idx = findModelbyName(model_type) - self.comboModelSelect.setCurrentIndex(idx) - self.statusbar.showMessage(f"正在加载 {self.modelType.name} 模型") - model = self.modelType.load_params(params_path=params_path) - if self.controller is None: - limit_longest_size = 400 - self.controller = InteractiveController( - model, - predictor_params={ - # 'brs_mode': 'f-BRS-B', - "brs_mode": "NoBRS", - "prob_thresh": 0.5, - "zoom_in_params": { - "skip_clicks": -1, - "target_size": (400, 400), - "expansion_ratio": 1.4, - }, - "predictor_params": {"net_clicks_limit": None, "max_size": 800}, - "brs_opt_func_params": {"min_iou_diff": 0.001}, - "lbfgs_params": {"maxfun": 20}, - }, - update_image_callback=self._update_image, - ) - self.controller.prob_thresh = self.segThresh - # 这里如果直接加载模型会报错,先判断有没有图像 - if self.image is not None: - self.controller.set_image(self.image) - else: - self.controller.reset_predictor(model) - self.statusbar.showMessage(f"{osp.basename(params_path)} 模型加载完成", 5000) - - def load_recent_params(self): - # TODO: 感觉整个模型加载需要判断一下网络是否匹配吗? - if len(self.recentParams) != 0: - if osp.exists(self.recentParams[-1]["path"]): - self.modelType, idx = findModelbyName(self.recentParams[-1]["type"]) - self.comboModelSelect.setCurrentIndex(idx) - self.load_model_params(self.recentParams[-1]["path"]) - - # def changeModel(self, idx): - # # TODO: 设置gpu还是cpu运行 - # self.statusbar.showMessage(f"正在加载 {models[idx].name} 模型") - # model = models[idx].get_model() - # if self.controller is None: - # self.controller = InteractiveController( - # model, - # predictor_params={"brs_mode": "f-BRS-B"}, - # update_image_callback=self._update_image, - # ) - # self.controller.prob_thresh = self.segThresh - # # 这里如果直接加载模型会报错,先判断有没有图像 - # if self.image is not None: - # self.controller.set_image(self.image) - # else: - # self.controller.reset_predictor(model) - - # self.statusbar.showMessage(f"{ models[idx].name}模型加载完成", 5000) - - def loadLabelList(self): - filters = self.tr("标签配置文件 (*.txt)") - file_path, _ = QtWidgets.QFileDialog.getOpenFileName( - self, - self.tr("%s - 选择标签配置文件路径") % __APPNAME__, - ".", - filters, - ) - if file_path == "": # 不加判断打开保存界面然后关闭会报错,主要是刷新列表 - return - self.labelList = util.readLabel(file_path) - print(self.labelList) - self.refreshLabelList() - self.settings.setValue("label_list_file", file_path) - - def saveLabelList(self): - if len(self.labelList) == 0: - msg = QMessageBox() - msg.setIcon(QMessageBox.Warning) - msg.setWindowTitle("没有需要保存的标签") - msg.setText("请先添加标签之后再进行保存") - msg.setStandardButtons(QMessageBox.Yes) - res = msg.exec_() - return - filters = self.tr("标签配置文件 (*.txt)") - dlg = QtWidgets.QFileDialog(self, "保存标签配置文件", ".", filters) - dlg.setDefaultSuffix("txt") - dlg.setAcceptMode(QtWidgets.QFileDialog.AcceptSave) - dlg.setOption(QtWidgets.QFileDialog.DontConfirmOverwrite, False) - dlg.setOption(QtWidgets.QFileDialog.DontUseNativeDialog, False) - savePath, _ = dlg.getSaveFileName( - self, - self.tr("保存标签配置文件"), - ".", - ) - print(savePath) - self.settings.setValue("label_list_file", savePath) - print("calling save label") - util.saveLabel(self.labelList, savePath) - - def addLabel(self): - # c = [255, 0, 0] - # 可以在配色表中预制多种容易分辨的颜色,直接随机生成恐怕生成类似的颜色不好区分 - c = self.maskColormap.get_color() # 从配色表取颜色 - table = self.labelListTable - table.insertRow(table.rowCount()) - idx = table.rowCount() - 1 - self.labelList.append([idx + 1, "", c]) - print("append", self.labelList) - numberItem = QTableWidgetItem(str(idx + 1)) - numberItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 0, numberItem) - - table.setItem(idx, 1, QTableWidgetItem()) - - colorItem = QTableWidgetItem() - colorItem.setBackground(QtGui.QColor(c[0], c[1], c[2])) - colorItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 2, colorItem) - - delItem = QTableWidgetItem() - delItem.setIcon(util.newIcon("Clear")) - delItem.setTextAlignment(Qt.AlignCenter) - delItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 3, delItem) - - def clearLabelList(self): - self.labelList = [] - if self.controller: - self.controller.label_list = [] - self.controller.curr_label_number = None - self.labelListTable.clear() - self.labelListTable.setRowCount(0) - - def refreshLabelList(self): - print(self.labelList) - table = self.labelListTable - table.clearContents() - table.setRowCount(len(self.labelList)) - table.setColumnCount(4) - for idx, lab in enumerate(self.labelList): - numberItem = QTableWidgetItem(str(lab[0])) - numberItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 0, numberItem) - table.setItem(idx, 1, QTableWidgetItem(lab[1])) - c = lab[2] - colorItem = QTableWidgetItem() - colorItem.setBackground(QtGui.QColor(c[0], c[1], c[2])) - colorItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 2, colorItem) - delItem = QTableWidgetItem() - delItem.setIcon(util.newIcon("clear")) - delItem.setTextAlignment(Qt.AlignCenter) - delItem.setFlags(QtCore.Qt.ItemIsEnabled) - table.setItem(idx, 3, delItem) - - cols = [0, 1, 3] - for idx in cols: - table.resizeColumnToContents(idx) - - def labelListDoubleClick(self, row, col): - print("cell double clicked", row, col) - if col != 2: - return - table = self.labelListTable - color = QtWidgets.QColorDialog.getColor() - # BUG: 判断颜色没变 - print(color.getRgb()) - table.item(row, col).setBackground(color) - self.labelList[row][2] = color.getRgb()[:3] - if self.controller: - self.controller.label_list = self.labelList - - def labelListClicked(self, row, col): - print("cell clicked", row, col) - table = self.labelListTable - if col == 3: - table.removeRow(row) - del self.labelList[row] - if col == 0 or col == 1: - for idx in range(len(self.labelList)): - table.item(idx, 0).setBackground(QtGui.QColor(255, 255, 255)) - table.item(row, 0).setBackground(QtGui.QColor(48, 140, 198)) - for idx in range(3): - table.item(row, idx).setSelected(True) - if self.controller: - print(int(table.item(row, 0).text())) - self.controller.change_label_num(int(table.item(row, 0).text())) - self.controller.label_list = self.labelList - - def labelListItemChanged(self, row, col): - print("cell changed", row, col) - if col != 1: - return - name = self.labelListTable.item(row, col).text() - self.labelList[row][1] = name - - def openImage(self): - formats = [ - "*.{}".format(fmt.data().decode()) - for fmt in QtGui.QImageReader.supportedImageFormats() - ] - filters = self.tr("Image & Label files (%s)") % " ".join(formats) - file_path, _ = QtWidgets.QFileDialog.getOpenFileName( - self, - self.tr("%s - 选择待标注图片") % __APPNAME__, - "/home/lin/Desktop", - filters, - ) - if len(file_path) == 0: - return - self.queueEvent(partial(self.loadImage, file_path)) - self.listFiles.addItems([file_path]) - self.filePaths.append(file_path) - # self.imagePath = file_path - - def loadLabel(self, imgPath): - if imgPath == "" or len(self.labelPaths) == 0: - return None - - def getName(path): - return osp.basename(path).split(".")[0] - - imgName = getName(imgPath) - for path in self.labelPaths: - if getName(path) == imgName: - labPath = path - print(labPath) - break - label = cv2.imread(path, cv2.IMREAD_UNCHANGED) - print("label shape", label.shape) - return label - - def loadImage(self, path, update_list=False): - if len(path) == 0 or not osp.exists(path): - return - # TODO: 在不同平台测试含中文路径 - image = cv2.imdecode(np.fromfile(path, dtype=np.uint8), 1) - image = image[:, :, ::-1] # BGR转RGB - self.image = image - self.currentPath = path - if self.controller: - self.controller.set_image(self.image) - else: - self.showWarning("未加载模型参数,请先加载模型参数!") - self.changeModel() - print("please load model params first!") - return 0 - self.controller.set_label(self.loadLabel(path)) - if path not in self.recentFiles: - self.recentFiles.append(path) - if len(self.recentFiles) > 10: - del self.recentFiles[0] - self.settings.setValue("recent_files", self.recentFiles) - self.imagePath = path # 修复使用近期文件的图像保存label报错 - if update_list: - self.listFiles.addItems([path]) - self.filePaths.append(path) - - def openFolder(self): - self.inputDir = QtWidgets.QFileDialog.getExistingDirectory( - self, - self.tr("%s - 选择待标注图片文件夹") % __APPNAME__, - "/home/lin/Desktop", - QtWidgets.QFileDialog.ShowDirsOnly - | QtWidgets.QFileDialog.DontResolveSymlinks, - ) - if len(self.inputDir) == 0: - return - filePaths = os.listdir(self.inputDir) - exts = QtGui.QImageReader.supportedImageFormats() - filePaths = [n for n in filePaths if n.split(".")[-1] in exts] - filePaths = [osp.join(self.inputDir, n) for n in filePaths] - self.filePaths += filePaths - self.listFiles.addItems(filePaths) - self.currIdx = 0 - self.turnImg(0) - - def listClicked(self): - if self.controller.is_incomplete_mask: - self.saveLabel() - toRow = self.listFiles.currentRow() - delta = toRow - self.currIdx - self.turnImg(delta) - - def turnImg(self, delta): - self.currIdx += delta - if self.currIdx >= len(self.filePaths) or self.currIdx < 0: - self.currIdx -= delta - self.statusbar.showMessage(f"没有{'后一张'if delta==1 else '前一张'}图片") - return - self.completeLastMask() - if self.isDirty: - if self.actions.auto_save.isChecked(): - self.saveLabel() - else: - msg = QMessageBox() - msg.setIcon(QMessageBox.Warning) - msg.setWindowTitle("保存标签?") - msg.setText("标签尚未保存,是否保存标签") - msg.setStandardButtons(QMessageBox.Yes | QMessageBox.Cancel) - res = msg.exec_() - if res == QMessageBox.Yes: - self.saveLabel() - - imagePath = self.filePaths[self.currIdx] - self.loadImage(imagePath) - self.imagePath = imagePath - self.listFiles.setCurrentRow(self.currIdx) - self.setClean() - - def finishObject(self): - if self.image is None: - return - if not self.controller: - return - self.controller.finish_object() - self.setDirty() - - def completeLastMask(self): - # 返回最后一个标签是否完成,false就是还有带点的 - if not self.controller: - return True - if not self.controller.is_incomplete_mask: - return True - msg = QMessageBox() - msg.setIcon(QMessageBox.Warning) - msg.setWindowTitle("完成最后一个目标?") - msg.setText("是否完成最后一个目标的标注,不完成不会进行保存。") - msg.setStandardButtons(QMessageBox.Yes | QMessageBox.Cancel) - res = msg.exec_() - if res == QMessageBox.Yes: - self.finishObject() - self.setDirty() - return True - return False - - def saveLabel(self, saveAs=False, savePath=None): - if not self.controller: - print("on controller") - return - if self.controller.image is None: - print("no image") - return - self.completeLastMask() - if not savePath: # 参数没传存到哪 - if not saveAs and self.outputDir is not None: - # 指定了标签文件夹,而且不是另存为 - savePath = osp.join( - self.outputDir, osp.basename(self.imagePath).split(".")[0] + ".png" - ) - else: - filters = self.tr("Label files (*.png)") - dlg = QtWidgets.QFileDialog( - self, "保存标签文件路径", osp.dirname(self.imagePath), filters - ) - dlg.setDefaultSuffix("png") - dlg.setAcceptMode(QtWidgets.QFileDialog.AcceptSave) - dlg.setOption(QtWidgets.QFileDialog.DontConfirmOverwrite, False) - dlg.setOption(QtWidgets.QFileDialog.DontUseNativeDialog, False) - savePath, _ = dlg.getSaveFileName( - self, - self.tr("选择标签文件保存路径"), - osp.basename(self.imagePath).split(".")[0] + ".png", - ) - print("++", savePath) - if ( - savePath is None - or len(savePath) == 0 - or not osp.exists(osp.dirname(savePath)) - ): - return - - cv2.imwrite(savePath, self.controller.result_mask) - # 保存路径带有中文 - # cv2.imencode('.png', self.controller.result_mask)[1].tofile(savePath) - # 保存带有调色板的 - # mask_pil = Image.fromarray(self.controller.result_mask, "P") - # mask_map = [0, 0, 0] - # for lb in self.labelList: - # mask_map += lb[2] - # mask_pil.putpalette(mask_map) - # mask_pil.save(savePath) - # self.setClean() - self.statusbar.showMessage(f"标签成功保存至 {savePath}") - - def setClean(self): - self.isDirty = False - - def setDirty(self): - self.isDirty = True - - def changeOutputDir(self): - outputDir = QtWidgets.QFileDialog.getExistingDirectory( - self, - self.tr("%s - 选择标签保存路径") % __APPNAME__, - # osp.dirname(self.imagePath), - ".", - QtWidgets.QFileDialog.ShowDirsOnly - | QtWidgets.QFileDialog.DontResolveSymlinks, - ) - if len(outputDir) == 0 or not osp.exists(outputDir): - return False - labelPaths = os.listdir(outputDir) - exts = ["png"] - labelPaths = [n for n in labelPaths if n.split(".")[-1] in exts] - labelPaths = [osp.join(outputDir, n) for n in labelPaths] - self.outputDir = outputDir - self.labelPaths = labelPaths - return True - - def maskOpacityChanged(self): - self.sldOpacity.textLab.setText(str(self.opacity)) - if not self.controller or self.controller.image is None: - return - self._update_image() - - def clickRadiusChanged(self): - self.sldClickRadius.textLab.setText(str(self.clickRadius)) - if not self.controller or self.controller.image is None: - return - - self._update_image() - - def threshChanged(self): - self.sldThresh.textLab.setText(str(self.segThresh)) - if not self.controller or self.controller.image is None: - return - self.controller.prob_thresh = self.segThresh - self._update_image() - - def undoClick(self): - if self.image is None: - return - if not self.controller: - return - self.controller.undo_click() - if not self.controller.is_incomplete_mask: - self.setClean() - - def undoAll(self): - if not self.controller or self.controller.image is None: - return - self.controller.reset_last_object() - self.setClean() - - def redoClick(self): - self.toBeImplemented() - - def canvasClick(self, x, y, isLeft): - if self.controller is None: - return - if self.controller.image is None: - return - currLabel = self.controller.curr_label_number - if not currLabel or currLabel == 0: - msg = QMessageBox() - msg.setIcon(QMessageBox.Warning) - msg.setWindowTitle("未选择当前标签") - msg.setText("请先在标签列表中单击点选标签") - msg.setStandardButtons(QMessageBox.Yes) - res = msg.exec_() - return - - self.controller.add_click(x, y, isLeft) - - def _update_image(self, reset_canvas=False): - if not self.controller: - return - image = self.controller.get_visualization( - alpha_blend=self.opacity, - click_radius=self.clickRadius, - ) - height, width, channel = image.shape - bytesPerLine = 3 * width - image = QImage(image.data, width, height, bytesPerLine, QImage.Format_RGB888) - if reset_canvas: - self.resetZoom(width, height) - self.scene.addPixmap(QPixmap(image)) - # TODO: 研究是否有类似swap的更高效方式 - self.scene.removeItem(self.scene.items()[1]) - - # 界面缩放重置 - def resetZoom(self, width, height): - # 每次加载图像前设定下当前的显示框,解决图像缩小后不在中心的问题 - self.scene.setSceneRect(0, 0, width, height) - # 缩放清除 - self.canvas.scale(1 / self.canvas.zoom_all, 1 / self.canvas.zoom_all) # 重置缩放 - self.canvas.zoom_all = 1 - # 最佳缩放 - s_eps = 5e-2 - scr_cont = [ - self.scrollArea.width() / width - s_eps, - self.scrollArea.height() / height - s_eps, - ] - if scr_cont[0] * height > self.scrollArea.height(): - self.canvas.zoom_all = scr_cont[1] - else: - self.canvas.zoom_all = scr_cont[0] - self.canvas.scale(self.canvas.zoom_all, self.canvas.zoom_all) - - @property - def opacity(self): - return self.sldOpacity.value() / 10 - - @property - def clickRadius(self): - return self.sldClickRadius.value() - - @property - def segThresh(self): - return self.sldThresh.value() / 10 - - # 警告框 - def showWarning(self, str): - msg_box = QMessageBox(QMessageBox.Warning, "警告", str) - msg_box.exec_() diff --git a/contrib/EISeg/eiseg/config/colormap.txt b/contrib/EISeg/eiseg/config/colormap.txt deleted file mode 100644 index 659139612c..0000000000 --- a/contrib/EISeg/eiseg/config/colormap.txt +++ /dev/null @@ -1,12 +0,0 @@ -227,87,101 -241,111,82 -250,206,80 -165,212,100 -98,206,173 -101,192,233 -105,156,237 -171,147,238 -228,136,192 -246,247,251 -205,208,217 -102,109,120 diff --git a/contrib/EISeg/eiseg/config/config.yaml b/contrib/EISeg/eiseg/config/config.yaml deleted file mode 100644 index 613c4f8d6d..0000000000 --- a/contrib/EISeg/eiseg/config/config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -auto_save: true -shortcut: - change_output_dir: Shift+Z - clear: Ctrl+Shift+Z - finish_object: Space - open_folder: Shift+A - open_image: Ctrl+A - redo: Ctrl+Y - turn_next: F - turn_prev: S - undo: Ctrl+Z diff --git a/contrib/EISeg/eiseg/controller.py b/contrib/EISeg/eiseg/controller.py deleted file mode 100644 index cf546b4fb6..0000000000 --- a/contrib/EISeg/eiseg/controller.py +++ /dev/null @@ -1,309 +0,0 @@ -import time - -import paddle -# from tkinter import messagebox - -import numpy as np -import paddleseg.transforms as T - -from inference import clicker -from inference.predictor import get_predictor -from util.vis import draw_with_blend_and_clicks - - -class InteractiveController: - def __init__(self, net, predictor_params, update_image_callback, prob_thresh=0.5): - self.net = net - self.prob_thresh = prob_thresh - self.clicker = clicker.Clicker() - self.states = [] - self.probs_history = [] - self.curr_label_number = 0 - self._result_mask = None - self.label_list = None # 存标签编号和颜色的对照 - self._init_mask = None - - self.image = None - self.predictor = None - self.update_image_callback = update_image_callback - self.predictor_params = predictor_params - self.reset_predictor() - - def set_image(self, image): - """设置当前标注的图片 - - Parameters - ---------- - image : - Description of parameter `image`. - """ - # TODO: 这里normalize需要按照模型改 - # input_transform = T.Compose( - # [T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])], - # to_rgb=False, - # ) - self.image = image - # self.image_nd = input_transform(image)[0] - self._result_mask = np.zeros(image.shape[:2], dtype=np.uint8) - # self.curr_label_number = 0 - self.reset_last_object(update_image=False) - self.update_image_callback(reset_canvas=True) - - def set_mask(self, mask): - # if self.image.shape[:2] != mask.shape[:2]: - # messagebox.showwarning( - # "Warning", - # "A segmentation mask must have the same sizes as the current image!", - # ) - # return - - if len(self.probs_history) > 0: - self.reset_last_object() - - self._init_mask = mask.astype(np.float32) - self.probs_history.append((np.zeros_like(self._init_mask), self._init_mask)) - self._init_mask = paddle.to_tensor(self._init_mask).unsqueeze(0).unsqueeze(0) - self.clicker.click_indx_offset = 1 - - def add_click(self, x, y, is_positive): - """添加一个点 - 跑推理,保存历史用于undo - Parameters - ---------- - x : type - Description of parameter `x`. - y : type - Description of parameter `y`. - is_positive : bool - 是否是正点 - Returns - ------- - bool - 点击是否成功添加 - """ - s = self.image.shape - if x < 0 or y < 0 or x >= s[1] or y >= s[0]: - print("点击越界") - return False - self.states.append( - { - "clicker": self.clicker.get_state(), - "predictor": self.predictor.get_states(), - } - ) - - click = clicker.Click(is_positive=is_positive, coords=(y, x)) - self.clicker.add_click(click) - start = time.time() - print(self.predictor) - pred = self.predictor.get_prediction(self.clicker, prev_mask=self._init_mask) - if self._init_mask is not None and len(self.clicker) == 1: - pred = self.predictor.get_prediction( - self.clicker, prev_mask=self._init_mask - ) - end = time.time() - print("cost time", end - start) - - if self.probs_history: - self.probs_history.append((self.probs_history[-1][0], pred)) - else: - self.probs_history.append((np.zeros_like(pred), pred)) - - self.update_image_callback() - - def set_label(self, label): - # if label is None: - # return - # self.probs_history.append((np.zeros_like(label), label)) - # print("len", len(self.probs_history)) - # self.update_image_callback() - pass - - def undo_click(self): - """undo一步点击""" - if not self.states: # 如果还没点 - return - - prev_state = self.states.pop() - self.clicker.set_state(prev_state["clicker"]) - self.predictor.set_states(prev_state["predictor"]) - self.probs_history.pop() - if not self.probs_history: - self.reset_init_mask() - self.update_image_callback() - - def partially_finish_object(self): - """部分完成 - 保存一个mask的状态,这个状态里不存点,看起来比较 - """ - - object_prob = self.current_object_prob - if object_prob is None: - return - - self.probs_history.append((object_prob, np.zeros_like(object_prob))) - self.states.append(self.states[-1]) - - self.clicker.reset_clicks() - self.reset_predictor() - self.reset_init_mask() - self.update_image_callback() - - def finish_object(self): - """结束当前物体标注,准备标下一个""" - object_prob = self.current_object_prob - if object_prob is None: - return - - # self.curr_label_number += 1 # TODO: 当前是按照第几个目标给结果中的数,改成根据目标编号 - object_mask = object_prob > self.prob_thresh - print('curr_label_number:', self.curr_label_number) - self._result_mask[object_mask] = self.curr_label_number - self.reset_last_object() - - def change_label_num(self, number): - """修改当前标签的编号 - 如果当前有标注到一半的目标,改mask。 - 如果没有,下一个目标是这个数 - Parameters - ---------- - number : int - 换成目标的编号 - """ - assert isinstance(number, int), "标签编号应为整数" - self.curr_label_number = number - if self.is_incomplete_mask: - pass - # TODO: 改当前mask的编号 - - def reset_last_object(self, update_image=True): - """重置控制器状态 - Parameters - ---------- - update_image : bool - Description of parameter `update_image`. - Returns - ------- - type - Description of returned object. - - """ - self.states = [] - self.probs_history = [] - self.clicker.reset_clicks() - self.reset_predictor() - self.reset_init_mask() - if update_image: - self.update_image_callback() - - def reset_predictor(self, net=None, predictor_params=None): - """重置推理器,可以换权重 - Parameters - ---------- - predictor_params : 网络权重 - 新的网络权重 - """ - # print("palette", self.palette) - if net is not None: - self.net = net - if predictor_params is not None: - self.predictor_params = predictor_params - self.predictor = get_predictor(self.net, **self.predictor_params) - if self.image is not None: - self.predictor.set_input_image(self.image) - - def reset_init_mask(self): - self._init_mask = None - self.clicker.click_indx_offset = 0 - - @property - def current_object_prob(self): - if self.probs_history: - current_prob_total, current_prob_additive = self.probs_history[-1] - return np.maximum(current_prob_total, current_prob_additive) - else: - return None - - @property - def is_incomplete_mask(self): - return len(self.probs_history) > 0 - - @property - def result_mask(self): - result_mask = self._result_mask.copy() - if self.probs_history: - result_mask[self.current_object_prob > self.prob_thresh] = ( - self.object_count + 1 - ) - return result_mask - - def get_visualization(self, alpha_blend, click_radius): - if self.image is None: - return None - - # 1. 画当前没标完的mask - results_mask_for_vis = self.result_mask - if self.probs_history: - results_mask_for_vis[ - self.current_object_prob > self.prob_thresh - ] = self.curr_label_number - - vis = draw_with_blend_and_clicks( - self.image, - mask=results_mask_for_vis, - alpha=alpha_blend, - clicks_list=self.clicker.clicks_list, - radius=click_radius, - palette=self.palette, - ) - - # 2. 在图片和当前mask的基础上画之前标完的mask - if self.probs_history: - total_mask = self.probs_history[-1][0] > self.prob_thresh - results_mask_for_vis[np.logical_not(total_mask)] = 0 - vis = draw_with_blend_and_clicks( - vis, - mask=results_mask_for_vis, - alpha=alpha_blend, - palette=self.palette, - ) - - return vis - - @property - def palette(self): - if self.label_list: - colors = [l[2] for l in self.label_list] - colors.insert(0, [0, 0, 0]) - else: - colors = [[0, 0, 0]] - print(colors) - return colors - - @property - def current_object_prob(self): - """获取当前推理标签""" - if self.probs_history: - current_prob_total, current_prob_additive = self.probs_history[-1] - return np.maximum(current_prob_total, current_prob_additive) - else: - return None - - @property - def is_incomplete_mask(self): - """ - Returns - ------- - bool - 当前的物体是不是还没标完 - """ - return len(self.probs_history) > 0 - - @property - def result_mask(self): - return self._result_mask.copy() - - @property - def img_size(self): - print(self.image.shape) - return self.image.shape[1::-1] diff --git a/contrib/EISeg/eiseg/controller.py.new b/contrib/EISeg/eiseg/controller.py.new deleted file mode 100644 index 46419bde59..0000000000 --- a/contrib/EISeg/eiseg/controller.py.new +++ /dev/null @@ -1,173 +0,0 @@ -import time -import numpy as np -import paddle -from tkinter import messagebox -from inference import clicker - -from inference.predictor import get_predictor -from util.vis import draw_with_blend_and_clicks -import paddleseg.transforms as T - - -class InteractiveController: - def __init__(self, net, predictor_params, update_image_callback, prob_thresh=0.5): - self.net = net - self.prob_thresh = prob_thresh - self.clicker = clicker.Clicker() - self.states = [] - self.probs_history = [] - self.object_count = 0 - self._result_mask = None - self._init_mask = None - - self.image = None - self.predictor = None - self.update_image_callback = update_image_callback - self.predictor_params = predictor_params - self.reset_predictor() - - def set_image(self, image): - # input_transform = T.Compose([T.Normalize(mean=[.485, .456, .406], std=[.229, .224, .225])], to_rgb=False) - self.image = image - # self.image_nd = input_transform(image)[0] - self._result_mask = np.zeros(image.shape[:2], dtype=np.uint16) - self.object_count = 0 - self.reset_last_object(update_image=False) - self.update_image_callback(reset_canvas=True) - - def set_mask(self, mask): - if self.image.shape[:2] != mask.shape[:2]: - messagebox.showwarning( - "Warning", - "A segmentation mask must have the same sizes as the current image!", - ) - return - - if len(self.probs_history) > 0: - self.reset_last_object() - - self._init_mask = mask.astype(np.float32) - self.probs_history.append((np.zeros_like(self._init_mask), self._init_mask)) - self._init_mask = paddle.to_tensor(self._init_mask).unsqueeze(0).unsqueeze(0) - self.clicker.click_indx_offset = 1 - - def add_click(self, x, y, is_positive): - self.states.append( - { - "clicker": self.clicker.get_state(), - "predictor": self.predictor.get_states(), - } - ) - - click = clicker.Click(is_positive=is_positive, coords=(y, x)) - self.clicker.add_click(click) - start = time.time() - pred = self.predictor.get_prediction(self.clicker, prev_mask=self._init_mask) - if self._init_mask is not None and len(self.clicker) == 1: - pred = self.predictor.get_prediction( - self.clicker, prev_mask=self._init_mask - ) - end = time.time() - print("cost time", end - start) - - if self.probs_history: - self.probs_history.append((self.probs_history[-1][0], pred)) - else: - self.probs_history.append((np.zeros_like(pred), pred)) - - self.update_image_callback() - - def undo_click(self): - if not self.states: - return - - prev_state = self.states.pop() - self.clicker.set_state(prev_state["clicker"]) - self.predictor.set_states(prev_state["predictor"]) - self.probs_history.pop() - if not self.probs_history: - self.reset_init_mask() - self.update_image_callback() - - def partially_finish_object(self): - object_prob = self.current_object_prob - if object_prob is None: - return - - self.probs_history.append((object_prob, np.zeros_like(object_prob))) - self.states.append(self.states[-1]) - - self.clicker.reset_clicks() - self.reset_predictor() - self.reset_init_mask() - self.update_image_callback() - - def finish_object(self): - if self.current_object_prob is None: - return - - self._result_mask = self.result_mask - self.object_count += 1 - self.reset_last_object() - - def reset_last_object(self, update_image=True): - self.states = [] - self.probs_history = [] - self.clicker.reset_clicks() - self.reset_predictor() - self.reset_init_mask() - if update_image: - self.update_image_callback() - - def reset_predictor(self, predictor_params=None): - if predictor_params is not None: - self.predictor_params = predictor_params - self.predictor = get_predictor(self.net, **self.predictor_params) - if self.image is not None: - self.predictor.set_input_image(self.image) - - def reset_init_mask(self): - self._init_mask = None - self.clicker.click_indx_offset = 0 - - @property - def current_object_prob(self): - if self.probs_history: - current_prob_total, current_prob_additive = self.probs_history[-1] - return np.maximum(current_prob_total, current_prob_additive) - else: - return None - - @property - def is_incomplete_mask(self): - return len(self.probs_history) > 0 - - @property - def result_mask(self): - result_mask = self._result_mask.copy() - if self.probs_history: - result_mask[self.current_object_prob > self.prob_thresh] = ( - self.object_count + 1 - ) - return result_mask - - def get_visualization(self, alpha_blend, click_radius): - if self.image is None: - return None - - results_mask_for_vis = self.result_mask - vis = draw_with_blend_and_clicks( - self.image, - mask=results_mask_for_vis, - alpha=alpha_blend, - clicks_list=self.clicker.clicks_list, - radius=click_radius, - ) - if self.probs_history: - total_mask = self.probs_history[-1][0] > self.prob_thresh - results_mask_for_vis[np.logical_not(total_mask)] = 0 - vis = draw_with_blend_and_clicks( - vis, mask=results_mask_for_vis, alpha=alpha_blend - ) - - return vis diff --git a/contrib/EISeg/eiseg/data/base.py b/contrib/EISeg/eiseg/data/base.py deleted file mode 100644 index 67a8d602cb..0000000000 --- a/contrib/EISeg/eiseg/data/base.py +++ /dev/null @@ -1,109 +0,0 @@ -import random -import pickle - -import cv2 -import numpy as np -import paddle - -import paddleseg.transforms as T -from .points_sampler import MultiPointSampler - - -def get_unique_labels(x, exclude_zero=False): - obj_sizes = np.bincount(x.flatten()) - labels = np.nonzero(obj_sizes)[0].tolist() - - if exclude_zero: - labels = [x for x in labels if x != 0] - return labels - - -class ISDataset(paddle.io.Dataset): - def __init__(self, - augmentator=None, - points_sampler=MultiPointSampler(max_num_points=12), - min_object_area=0, - min_ignore_object_area=10, - keep_background_prob=0.0, - with_image_info=False, - samples_scores_path=None, - samples_scores_gamma=1.0, - epoch_len=-1): - super(ISDataset, self).__init__() - self.epoch_len = epoch_len - self.augmentator = augmentator - self.min_object_area = min_object_area - self.keep_background_prob = keep_background_prob - self.points_sampler = points_sampler - self.with_image_info = with_image_info - self.samples_precomputed_scores = self._load_samples_scores(samples_scores_path, samples_scores_gamma) - self.dataset_samples = None - - def to_tensor(self, x): - if isinstance(x, np.ndarray): - if x.ndim == 2: - x = x[:,:,None] - #img = paddle.to_tensor(x.transpose([2,0,1])).astype('float32') / 255 - img = x.transpose([2,0,1]).astype(np.float32) / 255 - return img - - def __getitem__(self, index): - -# if self.samples_precomputed_scores is not None: -# index = np.random.choice(self.samples_precomputed_scores['indices'], -# p=self.samples_precomputed_scores['probs']) -# else: -# if self.epoch_len > 0: -# index = random.randrange(0, len(self.dataset_samples)) - sample = self.get_sample(index) - sample = self.augment_sample(sample) - sample.remove_small_objects(self.min_object_area) - self.points_sampler.sample_object(sample) - points = np.array(self.points_sampler.sample_points()).astype(np.float32) - mask = self.points_sampler.selected_mask - image = self.to_tensor(sample.image) - ids = sample.sample_id - - return image, points, mask - - def augment_sample(self, sample): - if self.augmentator is None: - return sample - - valid_augmentation = False - while not valid_augmentation: - sample.augment(self.augmentator) - keep_sample = (self.keep_background_prob < 0.0 or - random.random() < self.keep_background_prob) - valid_augmentation = len(sample) > 0 or keep_sample - - return sample - - def get_sample(self, index): - raise NotImplementedError - - def __len__(self): - if self.epoch_len > 0: - return self.epoch_len - else: - return self.get_samples_number() - - def get_samples_number(self): - return len(self.dataset_samples) - - @staticmethod - def _load_samples_scores(samples_scores_path, samples_scores_gamma): - if samples_scores_path is None: - return None - - with open(samples_scores_path, 'rb') as f: - images_scores = pickle.load(f) - - probs = np.array([(1.0 - x[2]) ** samples_scores_gamma for x in images_scores]) - probs /= probs.sum() - samples_scores = { - 'indices': [x[0] for x in images_scores], - 'probs': probs - } - print(f'Loaded {len(probs)} weights with gamma={samples_scores_gamma}') - return samples_scores diff --git a/contrib/EISeg/eiseg/data/compose.py b/contrib/EISeg/eiseg/data/compose.py deleted file mode 100644 index 0659594cd3..0000000000 --- a/contrib/EISeg/eiseg/data/compose.py +++ /dev/null @@ -1,39 +0,0 @@ -import numpy as np -from math import isclose -from .base import ISDataset - - -class ComposeDataset(ISDataset): - def __init__(self, datasets, **kwargs): - super(ComposeDataset, self).__init__(**kwargs) - - self._datasets = datasets - self.dataset_samples = [] - for dataset_indx, dataset in enumerate(self._datasets): - self.dataset_samples.extend([(dataset_indx, i) for i in range(len(dataset))]) - - def get_sample(self, index): - dataset_indx, sample_indx = self.dataset_samples[index] - return self._datasets[dataset_indx].get_sample(sample_indx) - - -class ProportionalComposeDataset(ISDataset): - def __init__(self, datasets, ratios, **kwargs): - super().__init__(**kwargs) - - assert len(ratios) == len(datasets),\ - "The number of datasets must match the number of ratios" - assert isclose(sum(ratios), 1.0),\ - "The sum of ratios must be equal to 1" - - self._ratios = ratios - self._datasets = datasets - self.dataset_samples = [] - for dataset_indx, dataset in enumerate(self._datasets): - self.dataset_samples.extend([(dataset_indx, i) for i in range(len(dataset))]) - - def get_sample(self, index): - dataset_indx = np.random.choice(len(self._datasets), p=self._ratios) - sample_indx = np.random.choice(len(self._datasets[dataset_indx])) - - return self._datasets[dataset_indx].get_sample(sample_indx) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/__init__.py b/contrib/EISeg/eiseg/data/datasets/__init__.py deleted file mode 100644 index 13115ac729..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from data.compose import ComposeDataset, ProportionalComposeDataset -from .berkeley import BerkeleyDataset -from .coco import CocoDataset -from .davis import DavisDataset -from .grabcut import GrabCutDataset -from .coco_lvis import CocoLvisDataset -from .lvis import LvisDataset -from .openimages import OpenImagesDataset -from .sbd import SBDDataset, SBDEvaluationDataset -from .images_dir import ImagesDirDataset -from .ade20k import ADE20kDataset -from .pascalvoc import PascalVocDataset -from .human import HumanDataset \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/ade20k.py b/contrib/EISeg/eiseg/data/datasets/ade20k.py deleted file mode 100644 index d9fa49dab8..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/ade20k.py +++ /dev/null @@ -1,55 +0,0 @@ -import os -import random -import pickle as pkl -from pathlib import Path - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample -from util.misc import get_labels_with_sizes - - -class ADE20kDataset(ISDataset): - def __init__(self, dataset_path, split='train', stuff_prob=0.0, **kwargs): - super().__init__(**kwargs) - assert split in {'train', 'val'} - - self.dataset_path = Path(dataset_path) - self.dataset_split = split - self.dataset_split_folder = 'training' if split == 'train' else 'validation' - self.stuff_prob = stuff_prob - - anno_path = self.dataset_path / f'{split}-annotations-object-segmentation.pkl' - if os.path.exists(anno_path): - with anno_path.open('rb') as f: - annotations = pkl.load(f) - else: - raise RuntimeError(f"Can't find annotations at {anno_path}") - self.annotations = annotations - self.dataset_samples = list(annotations.keys()) - - def get_sample(self, index) -> DSample: - image_id = self.dataset_samples[index] - sample_annos = self.annotations[image_id] - - image_path = str(self.dataset_path / sample_annos['folder'] / f'{image_id}.jpg') - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - # select random mask for an image - layer = random.choice(sample_annos['layers']) - mask_path = str(self.dataset_path / sample_annos['folder'] / layer['mask_name']) - instances_mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)[:, :, 0] # the B channel holds instances - instances_mask = instances_mask.astype(np.int32) - object_ids, _ = get_labels_with_sizes(instances_mask) - - if (self.stuff_prob <= 0) or (random.random() > self.stuff_prob): - # remove stuff objects - for i, object_id in enumerate(object_ids): - if i in layer['stuff_instances']: - instances_mask[instances_mask == object_id] = 0 - object_ids, _ = get_labels_with_sizes(instances_mask) - - return DSample(image, instances_mask, objects_ids=object_ids, sample_id=index) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/berkeley.py b/contrib/EISeg/eiseg/data/datasets/berkeley.py deleted file mode 100644 index 5c269d84af..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/berkeley.py +++ /dev/null @@ -1,6 +0,0 @@ -from .grabcut import GrabCutDataset - - -class BerkeleyDataset(GrabCutDataset): - def __init__(self, dataset_path, **kwargs): - super().__init__(dataset_path, images_dir_name='images', masks_dir_name='masks', **kwargs) diff --git a/contrib/EISeg/eiseg/data/datasets/coco.py b/contrib/EISeg/eiseg/data/datasets/coco.py deleted file mode 100644 index 5e3eb05e05..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/coco.py +++ /dev/null @@ -1,75 +0,0 @@ -import cv2 -import json -import random -import numpy as np -from pathlib import Path -from data.base import ISDataset -from data.sample import DSample - - -class CocoDataset(ISDataset): - def __init__(self, dataset_path, split='train', stuff_prob=0.0, **kwargs): - super(CocoDataset, self).__init__(**kwargs) - self.split = split - self.dataset_path = Path(dataset_path) - self.stuff_prob = stuff_prob - - self.load_samples() - - def load_samples(self): - annotation_path = self.dataset_path / 'annotations' / f'panoptic_{self.split}.json' - self.labels_path = self.dataset_path / 'annotations' / f'panoptic_{self.split}' - self.images_path = self.dataset_path / self.split - - with open(annotation_path, 'r') as f: - annotation = json.load(f) - - self.dataset_samples = annotation['annotations'] - - self._categories = annotation['categories'] - self._stuff_labels = [x['id'] for x in self._categories if x['isthing'] == 0] - self._things_labels = [x['id'] for x in self._categories if x['isthing'] == 1] - self._things_labels_set = set(self._things_labels) - self._stuff_labels_set = set(self._stuff_labels) - - def get_sample(self, index) -> DSample: - dataset_sample = self.dataset_samples[index] - - image_path = self.images_path / self.get_image_name(dataset_sample['file_name']) - label_path = self.labels_path / dataset_sample['file_name'] - - image = cv2.imread(str(image_path)) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - label = cv2.imread(str(label_path), cv2.IMREAD_UNCHANGED).astype(np.int32) - # 这个是什么处理呢 - label = 256 * 256 * label[:, :, 0] + 256 * label[:, :, 1] + label[:, :, 2] - - instance_map = np.full_like(label, 0) - things_ids = [] - stuff_ids = [] - - for segment in dataset_sample['segments_info']: - class_id = segment['category_id'] - obj_id = segment['id'] - if class_id in self._things_labels_set: - if segment['iscrowd'] == 1: - continue - things_ids.append(obj_id) - else: - stuff_ids.append(obj_id) - - instance_map[label == obj_id] = obj_id - - if self.stuff_prob > 0 and random.random() < self.stuff_prob: - instances_ids = things_ids + stuff_ids - else: - instances_ids = things_ids - - for stuff_id in stuff_ids: - instance_map[instance_map == stuff_id] = 0 - - return DSample(image, instance_map, objects_ids=instances_ids) - - @classmethod - def get_image_name(cls, panoptic_name): - return panoptic_name.replace('.png', '.jpg') diff --git a/contrib/EISeg/eiseg/data/datasets/coco_lvis.py b/contrib/EISeg/eiseg/data/datasets/coco_lvis.py deleted file mode 100644 index ae06dff23e..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/coco_lvis.py +++ /dev/null @@ -1,69 +0,0 @@ -from pathlib import Path -import pickle -import random -import numpy as np -import json -import cv2 -from copy import deepcopy -from data.base import ISDataset -from data.sample import DSample - - -class CocoLvisDataset(ISDataset): - def __init__(self, dataset_path, split='train', stuff_prob=0.0, - allow_list_name=None, anno_file='hannotation.pickle', **kwargs): - super(CocoLvisDataset, self).__init__(**kwargs) - dataset_path = Path(dataset_path) - self._split_path = dataset_path / split - self.split = split - self._images_path = self._split_path / 'images' - self._masks_path = self._split_path / 'masks' - self.stuff_prob = stuff_prob - - with open(self._split_path / anno_file, 'rb') as f: - self.dataset_samples = sorted(pickle.load(f).items()) - - if allow_list_name is not None: - allow_list_path = self._split_path / allow_list_name - with open(allow_list_path, 'r') as f: - allow_images_ids = json.load(f) - allow_images_ids = set(allow_images_ids) - - self.dataset_samples = [sample for sample in self.dataset_samples - if sample[0] in allow_images_ids] - - def get_sample(self, index) -> DSample: - # 将mask都读取出来,然后取出label之后单独处理,使得每个instance得到一个单独都mask,物体部分为0,生成一个binary mask。 - image_id, sample = self.dataset_samples[index] - image_path = self._images_path / f'{image_id}.jpg' - - image = cv2.imread(str(image_path)) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - packed_masks_path = self._masks_path / f'{image_id}.pickle' - with open(packed_masks_path, 'rb') as f: - encoded_layers, objs_mapping = pickle.load(f) - layers = [cv2.imdecode(x, cv2.IMREAD_UNCHANGED) for x in encoded_layers] - layers = np.stack(layers, axis=2) - - instances_info = deepcopy(sample['hierarchy']) - for inst_id, inst_info in list(instances_info.items()): - if inst_info is None: - inst_info = {'children': [], 'parent': None, 'node_level': 0} - instances_info[inst_id] = inst_info - inst_info['mapping'] = objs_mapping[inst_id] - - if self.stuff_prob > 0 and random.random() < self.stuff_prob: - for inst_id in range(sample['num_instance_masks'], len(objs_mapping)): - instances_info[inst_id] = { - 'mapping': objs_mapping[inst_id], - 'parent': None, - 'children': [] - } - else: - #mask中有4个layer,每个layer标着在各自的layer上都有什么mask—id - for inst_id in range(sample['num_instance_masks'], len(objs_mapping)): - layer_indx, mask_id = objs_mapping[inst_id] - layers[:, :, layer_indx][layers[:, :, layer_indx] == mask_id] = 0 - - return DSample(image, layers, objects=instances_info) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/davis.py b/contrib/EISeg/eiseg/data/datasets/davis.py deleted file mode 100644 index c5ad5b935f..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/davis.py +++ /dev/null @@ -1,33 +0,0 @@ -from pathlib import Path - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class DavisDataset(ISDataset): - def __init__(self, dataset_path, - images_dir_name='img', masks_dir_name='gt', - **kwargs): - super(DavisDataset, self).__init__(**kwargs) - - self.dataset_path = Path(dataset_path) - self._images_path = self.dataset_path / images_dir_name - self._insts_path = self.dataset_path / masks_dir_name - - self.dataset_samples = [x.name for x in sorted(self._images_path.glob('*.*'))] - self._masks_paths = {x.stem: x for x in self._insts_path.glob('*.*')} - - def get_sample(self, index) -> DSample: - image_name = self.dataset_samples[index] - image_path = str(self._images_path / image_name) - mask_path = str(self._masks_paths[image_name.split('.')[0]]) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - instances_mask = np.max(cv2.imread(mask_path).astype(np.int32), axis=2) - instances_mask[instances_mask > 0] = 1 - - return DSample(image, instances_mask, objects_ids=[1], sample_id=index) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/grabcut.py b/contrib/EISeg/eiseg/data/datasets/grabcut.py deleted file mode 100644 index 662142d637..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/grabcut.py +++ /dev/null @@ -1,34 +0,0 @@ -from pathlib import Path - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class GrabCutDataset(ISDataset): - def __init__(self, dataset_path, - images_dir_name='data_GT', masks_dir_name='boundary_GT', - **kwargs): - super(GrabCutDataset, self).__init__(**kwargs) - - self.dataset_path = Path(dataset_path) - self._images_path = self.dataset_path / images_dir_name - self._insts_path = self.dataset_path / masks_dir_name - - self.dataset_samples = [x.name for x in sorted(self._images_path.glob('*.*'))] - self._masks_paths = {x.stem: x for x in self._insts_path.glob('*.*')} - - def get_sample(self, index) -> DSample: - image_name = self.dataset_samples[index] - image_path = str(self._images_path / image_name) - mask_path = str(self._masks_paths[image_name.split('.')[0]]) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - instances_mask = cv2.imread(mask_path)[:, :, 0].astype(np.int32) - instances_mask[instances_mask == 128] = -1 - instances_mask[instances_mask > 128] = 1 - - return DSample(image, instances_mask, objects_ids=[1], ignore_ids=[-1], sample_id=index) diff --git a/contrib/EISeg/eiseg/data/datasets/human.py b/contrib/EISeg/eiseg/data/datasets/human.py deleted file mode 100644 index afef7302a1..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/human.py +++ /dev/null @@ -1,51 +0,0 @@ -from pathlib import Path - -import os -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class HumanDataset(ISDataset): - def __init__(self, dataset_path, - split = 'train', - **kwargs): - super(HumanDataset, self).__init__(**kwargs) - - self.mode = split.lower() - self.path = dataset_path - - if self.mode == 'train': - file_path = os.path.join(self.path, 'train_mini.txt') - else: - file_path = os.path.join(self.path, 'val_mini.txt') - - self.dataset_samples = [] - with open(file_path, 'r') as f: - for line in f.readlines(): - line = line.strip() - if line != '': - self.dataset_samples.append(line) - - def get_sample(self, index): - items = self.dataset_samples[index].split(' ') - if 'person_detection__ds' in items[0]: - image_path, image_name = items[0].rsplit('/', 1) - items[0] = image_path.rsplit('/', 1)[0] + '/' + image_name - mask_path, mask_name = items[1].rsplit('/', 1) - items[1] = mask_path.rsplit('/', 1)[0] + '/' + mask_name - - - image_path = os.path.join(self.path, items[0]) - mask_path = os.path.join(self.path, items[1]) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - instances_mask = np.max(cv2.imread(mask_path).astype(np.int32), axis=2) - instances_mask[instances_mask > 0] = 1 - - return DSample(image, instances_mask, objects_ids=[1], sample_id=index) - diff --git a/contrib/EISeg/eiseg/data/datasets/images_dir.py b/contrib/EISeg/eiseg/data/datasets/images_dir.py deleted file mode 100644 index 8e809ec002..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/images_dir.py +++ /dev/null @@ -1,59 +0,0 @@ -import cv2 -import numpy as np -from pathlib import Path - -from data.base import ISDataset -from data.sample import DSample - - -class ImagesDirDataset(ISDataset): - def __init__(self, dataset_path, - images_dir_name='images', masks_dir_name='masks', - **kwargs): - super(ImagesDirDataset, self).__init__(**kwargs) - - self.dataset_path = Path(dataset_path) - self._images_path = self.dataset_path / images_dir_name - self._insts_path = self.dataset_path / masks_dir_name - - images_list = [x for x in sorted(self._images_path.glob('*.*'))] - - samples = {x.stem: {'image': x, 'masks': []} for x in images_list} - for mask_path in self._insts_path.glob('*.*'): - mask_name = mask_path.stem - if mask_name in samples: - samples[mask_name]['masks'].append(mask_path) - continue - - mask_name_split = mask_name.split('_') - if mask_name_split[-1].isdigit(): - mask_name = '_'.join(mask_name_split[:-1]) - assert mask_name in samples - samples[mask_name]['masks'].append(mask_path) - - for x in samples.values(): - assert len(x['masks']) > 0, x['image'] - - self.dataset_samples = [v for k, v in sorted(samples.items())] - - def get_sample(self, index) -> DSample: - sample = self.dataset_samples[index] - image_path = str(sample['image']) - - objects = [] - ignored_regions = [] - masks = [] - for indx, mask_path in enumerate(sample['masks']): - gt_mask = cv2.imread(str(mask_path))[:, :, 0].astype(np.int32) - instances_mask = np.zeros_like(gt_mask) - instances_mask[gt_mask == 128] = 2 - instances_mask[gt_mask > 128] = 1 - masks.append(instances_mask) - objects.append((indx, 1)) - ignored_regions.append((indx, 2)) - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - return DSample(image, np.stack(masks, axis=2), - objects_ids=objects, ignore_ids=ignored_regions, sample_id=index) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/lvis.py b/contrib/EISeg/eiseg/data/datasets/lvis.py deleted file mode 100644 index 6d21ba5a7f..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/lvis.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -import random -from collections import defaultdict -from pathlib import Path - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class LvisDataset(ISDataset): - def __init__(self, dataset_path, split='train', - max_overlap_ratio=0.5, - **kwargs): - super(LvisDataset, self).__init__(**kwargs) - dataset_path = Path(dataset_path) - train_categories_path = dataset_path / 'train_categories.json' - self._train_path = dataset_path / 'train' - self._val_path = dataset_path / 'val' - - self.split = split - self.max_overlap_ratio = max_overlap_ratio - - with open( dataset_path / split / f'lvis_{self.split}.json', 'r') as f: - json_annotation = json.loads(f.read()) - - self.annotations = defaultdict(list) - for x in json_annotation['annotations']: - self.annotations[x['image_id']].append(x) - - if not train_categories_path.exists(): - self.generate_train_categories(dataset_path, train_categories_path) - self.dataset_samples = [x for x in json_annotation['images'] - if len(self.annotations[x['id']]) > 0] - - def get_sample(self, index) -> DSample: - image_info = self.dataset_samples[index] - image_id, image_url = image_info['id'], image_info['coco_url'] - image_filename = image_url.split('/')[-1] - image_annotations = self.annotations[image_id] - random.shuffle(image_annotations) - - # LVISv1 splits do not match older LVIS splits (some images in val may come from COCO train2017) - if 'train2017' in image_url: - image_path = self._train_path / 'images' / image_filename - else: - image_path = self._val_path / 'images' / image_filename - image = cv2.imread(str(image_path)) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - instances_mask = None - instances_area = defaultdict(int) - objects_ids = [] - for indx, obj_annotation in enumerate(image_annotations): - mask = self.get_mask_from_polygon(obj_annotation, image) - object_mask = mask > 0 - object_area = object_mask.sum() - - if instances_mask is None: - instances_mask = np.zeros_like(object_mask, dtype=np.int32) - - overlap_ids = np.bincount(instances_mask[object_mask].flatten()) - overlap_areas = [overlap_area / instances_area[inst_id] for inst_id, overlap_area in enumerate(overlap_ids) - if overlap_area > 0 and inst_id > 0] - overlap_ratio = np.logical_and(object_mask, instances_mask > 0).sum() / object_area - if overlap_areas: - overlap_ratio = max(overlap_ratio, max(overlap_areas)) - if overlap_ratio > self.max_overlap_ratio: - continue - - instance_id = indx + 1 - instances_mask[object_mask] = instance_id - instances_area[instance_id] = object_area - objects_ids.append(instance_id) - - return DSample(image, instances_mask, objects_ids=objects_ids) - - - @staticmethod - def get_mask_from_polygon(annotation, image): - mask = np.zeros(image.shape[:2], dtype=np.int32) - for contour_points in annotation['segmentation']: - contour_points = np.array(contour_points).reshape((-1, 2)) - contour_points = np.round(contour_points).astype(np.int32)[np.newaxis, :] - cv2.fillPoly(mask, contour_points, 1) - - return mask - - @staticmethod - def generate_train_categories(dataset_path, train_categories_path): - with open(dataset_path / 'train/lvis_train.json', 'r') as f: - annotation = json.load(f) - - with open(train_categories_path, 'w') as f: - json.dump(annotation['categories'], f, indent=1) diff --git a/contrib/EISeg/eiseg/data/datasets/openimages.py b/contrib/EISeg/eiseg/data/datasets/openimages.py deleted file mode 100644 index 4a9360b0f6..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/openimages.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import random -import pickle as pkl -from pathlib import Path - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class OpenImagesDataset(ISDataset): - def __init__(self, dataset_path, split='train', **kwargs): - super().__init__(**kwargs) - assert split in {'train', 'val', 'test'} - - self.dataset_path = Path(dataset_path) - self._split_path = self.dataset_path / split - self._images_path = self._split_path / 'images' - self._masks_path = self._split_path / 'masks' - self.dataset_split = split - - clean_anno_path = self._split_path / f'{split}-annotations-object-segmentation_clean.pkl' - if os.path.exists(clean_anno_path): - with clean_anno_path.open('rb') as f: - annotations = pkl.load(f) - else: - raise RuntimeError(f"Can't find annotations at {clean_anno_path}") - self.image_id_to_masks = annotations['image_id_to_masks'] - self.dataset_samples = annotations['dataset_samples'] - - def get_sample(self, index) -> DSample: - image_id = self.dataset_samples[index] - - image_path = str(self._images_path / f'{image_id}.jpg') - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - - mask_paths = self.image_id_to_masks[image_id] - # select random mask for an image - mask_path = str(self._masks_path / random.choice(mask_paths)) - instances_mask = cv2.imread(mask_path) - instances_mask = cv2.cvtColor(instances_mask, cv2.COLOR_BGR2GRAY) - instances_mask[instances_mask > 0] = 1 - instances_mask = instances_mask.astype(np.int32) - - min_width = min(image.shape[1], instances_mask.shape[1]) - min_height = min(image.shape[0], instances_mask.shape[0]) - - if image.shape[0] != min_height or image.shape[1] != min_width: - image = cv2.resize(image, (min_width, min_height), interpolation=cv2.INTER_LINEAR) - if instances_mask.shape[0] != min_height or instances_mask.shape[1] != min_width: - instances_mask = cv2.resize(instances_mask, (min_width, min_height), interpolation=cv2.INTER_NEAREST) - - object_ids = [1] if instances_mask.sum() > 0 else [] - - return DSample(image, instances_mask, objects_ids=object_ids, sample_id=index) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/datasets/pascalvoc.py b/contrib/EISeg/eiseg/data/datasets/pascalvoc.py deleted file mode 100644 index 24bc5743f4..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/pascalvoc.py +++ /dev/null @@ -1,47 +0,0 @@ -import pickle as pkl -from pathlib import Path -from random import choice - -import cv2 -import numpy as np - -from data.base import ISDataset -from data.sample import DSample - - -class PascalVocDataset(ISDataset): - def __init__(self, dataset_path, split='train', **kwargs): - super().__init__(**kwargs) - assert split in {'train', 'val', 'trainval'} - - self.dataset_path = Path(dataset_path) - self._images_path = self.dataset_path / "JPEGImages" - self._insts_path = self.dataset_path / "SegmentationObject" - self.dataset_split = split - with open(self.dataset_path / f'ImageSets/Segmentation/{split}.txt', 'r') as f: - - self.dataset_samples = [name.strip() for name in f.readlines()] - - def get_sample(self, index) -> DSample: - sample_id = self.dataset_samples[index] - image_path = str(self._images_path / f'{sample_id}.jpg') - mask_path = str(self._insts_path / f'{sample_id}.png') - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - instances_mask = cv2.imread(mask_path) - instances_mask = cv2.cvtColor(instances_mask, cv2.COLOR_BGR2GRAY).astype(np.int32) - if self.dataset_split == 'val': - objects_ids = np.unique(instances_mask) - objects_ids = [x for x in objects_ids if x != 0 and x != 220] - instance_id = choice(objects_ids) - mask = np.zeros_like(instances_mask) - mask[instances_mask == 220] = 220 # ignored area - mask[instances_mask == instance_id] = 1 - objects_ids = [1] - instances_mask = mask - else: - objects_ids = np.unique(instances_mask) - objects_ids = [x for x in objects_ids if x != 0 and x != 220] - - return DSample(image, instances_mask, objects_ids=objects_ids, ignore_ids=[220], sample_id=index) diff --git a/contrib/EISeg/eiseg/data/datasets/sbd.py b/contrib/EISeg/eiseg/data/datasets/sbd.py deleted file mode 100644 index 3ed0471a90..0000000000 --- a/contrib/EISeg/eiseg/data/datasets/sbd.py +++ /dev/null @@ -1,111 +0,0 @@ -import pickle as pkl -from pathlib import Path - -import cv2 -import numpy as np -from scipy.io import loadmat - -from util.misc import get_bbox_from_mask, get_labels_with_sizes -from data.base import ISDataset -from data.sample import DSample - - -class SBDDataset(ISDataset): - def __init__(self, dataset_path, split='train', buggy_mask_thresh=0.08, **kwargs): - super(SBDDataset, self).__init__(**kwargs) - assert split in {'train', 'val'} - - self.dataset_path = Path(dataset_path) - self.dataset_split = split - self._images_path = self.dataset_path / 'img' - self._insts_path = self.dataset_path / 'inst' - self._buggy_objects = dict() - self._buggy_mask_thresh = buggy_mask_thresh - - with open(self.dataset_path / f'{split}.txt', 'r') as f: - self.dataset_samples = [x.strip() for x in f.readlines()] - - def get_sample(self, index): - image_name = self.dataset_samples[index] - image_path = str(self._images_path / f'{image_name}.jpg') - inst_info_path = str(self._insts_path / f'{image_name}.mat') - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - instances_mask = loadmat(str(inst_info_path))['GTinst'][0][0][0].astype(np.int32) - instances_mask = self.remove_buggy_masks(index, instances_mask) - instances_ids, _ = get_labels_with_sizes(instances_mask) - - return DSample(image, instances_mask, objects_ids=instances_ids, sample_id=index) - - def remove_buggy_masks(self, index, instances_mask): - if self._buggy_mask_thresh > 0.0: - buggy_image_objects = self._buggy_objects.get(index, None) - if buggy_image_objects is None: - buggy_image_objects = [] - instances_ids, _ = get_labels_with_sizes(instances_mask) - for obj_id in instances_ids: - obj_mask = instances_mask == obj_id - mask_area = obj_mask.sum() - bbox = get_bbox_from_mask(obj_mask) - bbox_area = (bbox[1] - bbox[0] + 1) * (bbox[3] - bbox[2] + 1) - obj_area_ratio = mask_area / bbox_area - if obj_area_ratio < self._buggy_mask_thresh: - buggy_image_objects.append(obj_id) - - self._buggy_objects[index] = buggy_image_objects - for obj_id in buggy_image_objects: - instances_mask[instances_mask == obj_id] = 0 - - return instances_mask - - -class SBDEvaluationDataset(ISDataset): - def __init__(self, dataset_path, split='val', **kwargs): - super(SBDEvaluationDataset, self).__init__(**kwargs) - assert split in {'train', 'val'} - - self.dataset_path = Path(dataset_path) - self.dataset_split = split - self._images_path = self.dataset_path / 'img' - self._insts_path = self.dataset_path / 'inst' - - with open(self.dataset_path / f'{split}.txt', 'r') as f: - self.dataset_samples = [x.strip() for x in f.readlines()] - - self.dataset_samples = self.get_sbd_images_and_ids_list() - - def get_sample(self, index) -> DSample: - image_name, instance_id = self.dataset_samples[index] - image_path = str(self._images_path / f'{image_name}.jpg') - inst_info_path = str(self._insts_path / f'{image_name}.mat') - - image = cv2.imread(image_path) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - instances_mask = loadmat(str(inst_info_path))['GTinst'][0][0][0].astype(np.int32) - instances_mask[instances_mask != instance_id] = 0 - instances_mask[instances_mask > 0] = 1 - - return DSample(image, instances_mask, objects_ids=[1], sample_id=index) - - def get_sbd_images_and_ids_list(self): - pkl_path = self.dataset_path / f'{self.dataset_split}_images_and_ids_list.pkl' - - if pkl_path.exists(): - with open(str(pkl_path), 'rb') as fp: - images_and_ids_list = pkl.load(fp) - else: - images_and_ids_list = [] - - for sample in self.dataset_samples: - inst_info_path = str(self._insts_path / f'{sample}.mat') - instances_mask = loadmat(str(inst_info_path))['GTinst'][0][0][0].astype(np.int32) - instances_ids, _ = get_labels_with_sizes(instances_mask) - - for instances_id in instances_ids: - images_and_ids_list.append((sample, instances_id)) - - with open(str(pkl_path), 'wb') as fp: - pkl.dump(images_and_ids_list, fp) - - return images_and_ids_list \ No newline at end of file diff --git a/contrib/EISeg/eiseg/data/points_sampler.py b/contrib/EISeg/eiseg/data/points_sampler.py deleted file mode 100644 index c2b906e69d..0000000000 --- a/contrib/EISeg/eiseg/data/points_sampler.py +++ /dev/null @@ -1,306 +0,0 @@ -import cv2 -import math -import random -import numpy as np -from functools import lru_cache -from .sample import DSample - - -class BasePointSampler: - def __init__(self): - self._selected_mask = None - self._selected_masks = None - - def sample_object(self, sample: DSample): - raise NotImplementedError - - def sample_points(self): - raise NotImplementedError - - @property - def selected_mask(self): - assert self._selected_mask is not None - return self._selected_mask - - @selected_mask.setter - def selected_mask(self, mask): - self._selected_mask = mask[np.newaxis, :].astype(np.float32) - - -class MultiPointSampler(BasePointSampler): - def __init__(self, max_num_points, prob_gamma=0.7, expand_ratio=0.1, - positive_erode_prob=0.9, positive_erode_iters=3, - negative_bg_prob=0.1, negative_other_prob=0.4, negative_border_prob=0.5, - merge_objects_prob=0.0, max_num_merged_objects=2, - use_hierarchy=False, soft_targets=False, - first_click_center=False, only_one_first_click=False, - sfc_inner_k=1.7, sfc_full_inner_prob=0.0): - super().__init__() - self.max_num_points = max_num_points - self.expand_ratio = expand_ratio - self.positive_erode_prob = positive_erode_prob - self.positive_erode_iters = positive_erode_iters - self.merge_objects_prob = merge_objects_prob - self.use_hierarchy = use_hierarchy - self.soft_targets = soft_targets - self.first_click_center = first_click_center - self.only_one_first_click = only_one_first_click - self.sfc_inner_k = sfc_inner_k - self.sfc_full_inner_prob = sfc_full_inner_prob - - if max_num_merged_objects == -1: - max_num_merged_objects = max_num_points - self.max_num_merged_objects = max_num_merged_objects - - self.neg_strategies = ['bg', 'other', 'border'] - self.neg_strategies_prob = [negative_bg_prob, negative_other_prob, negative_border_prob] - assert math.isclose(sum(self.neg_strategies_prob), 1.0) - - self._pos_probs = generate_probs(max_num_points, gamma=prob_gamma) - self._neg_probs = generate_probs(max_num_points + 1, gamma=prob_gamma) - self._neg_masks = None - - def sample_object(self, sample: DSample): - if len(sample) == 0: - bg_mask = sample.get_background_mask() - self.selected_mask = np.zeros_like(bg_mask, dtype=np.float32) - self._selected_masks = [[]] - self._neg_masks = {strategy: bg_mask for strategy in self.neg_strategies} - self._neg_masks['required'] = [] - return - - gt_mask, pos_masks, neg_masks = self._sample_mask(sample) - binary_gt_mask = gt_mask > 0.5 if self.soft_targets else gt_mask > 0 - - self.selected_mask = gt_mask - self._selected_masks = pos_masks - - neg_mask_bg = np.logical_not(binary_gt_mask) - neg_mask_border = self._get_border_mask(binary_gt_mask) - if len(sample) <= len(self._selected_masks): - neg_mask_other = neg_mask_bg - else: - neg_mask_other = np.logical_and(np.logical_not(sample.get_background_mask()), - np.logical_not(binary_gt_mask)) - - self._neg_masks = { - 'bg': neg_mask_bg, - 'other': neg_mask_other, - 'border': neg_mask_border, - 'required': neg_masks - } - - def _sample_mask(self, sample: DSample): - root_obj_ids = sample.root_objects - - if len(root_obj_ids) > 1 and random.random() < self.merge_objects_prob: - max_selected_objects = min(len(root_obj_ids), self.max_num_merged_objects) - num_selected_objects = np.random.randint(2, max_selected_objects + 1) - random_ids = random.sample(root_obj_ids, num_selected_objects) - else: - random_ids = [random.choice(root_obj_ids)] - - gt_mask = None - pos_segments = [] - neg_segments = [] - for obj_id in random_ids: - obj_gt_mask, obj_pos_segments, obj_neg_segments = self._sample_from_masks_layer(obj_id, sample) - if gt_mask is None: - gt_mask = obj_gt_mask - else: - gt_mask = np.maximum(gt_mask, obj_gt_mask) - - pos_segments.extend(obj_pos_segments) - neg_segments.extend(obj_neg_segments) - - pos_masks = [self._positive_erode(x) for x in pos_segments] - neg_masks = [self._positive_erode(x) for x in neg_segments] - - return gt_mask, pos_masks, neg_masks - - def _sample_from_masks_layer(self, obj_id, sample: DSample): - objs_tree = sample._objects - - if not self.use_hierarchy: - node_mask = sample.get_object_mask(obj_id) - gt_mask = sample.get_soft_object_mask(obj_id) if self.soft_targets else node_mask - return gt_mask, [node_mask], [] - - def _select_node(node_id): - node_info = objs_tree[node_id] - if not node_info['children'] or random.random() < 0.5: - return node_id - return _select_node(random.choice(node_info['children'])) - - selected_node = _select_node(obj_id) - node_info = objs_tree[selected_node] - node_mask = sample.get_object_mask(selected_node) - gt_mask = sample.get_soft_object_mask(selected_node) if self.soft_targets else node_mask - pos_mask = node_mask.copy() - - negative_segments = [] - if node_info['parent'] is not None and node_info['parent'] in objs_tree: - parent_mask = sample.get_object_mask(node_info['parent']) - negative_segments.append(np.logical_and(parent_mask, np.logical_not(node_mask))) - - for child_id in node_info['children']: - if objs_tree[child_id]['area'] / node_info['area'] < 0.10: - child_mask = sample.get_object_mask(child_id) - pos_mask = np.logical_and(pos_mask, np.logical_not(child_mask)) - - if node_info['children']: - max_disabled_children = min(len(node_info['children']), 3) - num_disabled_children = np.random.randint(0, max_disabled_children + 1) - disabled_children = random.sample(node_info['children'], num_disabled_children) - - for child_id in disabled_children: - child_mask = sample.get_object_mask(child_id) - pos_mask = np.logical_and(pos_mask, np.logical_not(child_mask)) - if self.soft_targets: - soft_child_mask = sample.get_soft_object_mask(child_id) - gt_mask = np.minimum(gt_mask, 1.0 - soft_child_mask) - else: - gt_mask = np.logical_and(gt_mask, np.logical_not(child_mask)) - negative_segments.append(child_mask) - - return gt_mask, [pos_mask], negative_segments - - def sample_points(self): - assert self._selected_mask is not None - pos_points = self._multi_mask_sample_points(self._selected_masks, - is_negative=[False] * len(self._selected_masks), - with_first_click=self.first_click_center) - - neg_strategy = [(self._neg_masks[k], prob) - for k, prob in zip(self.neg_strategies, self.neg_strategies_prob)] - neg_masks = self._neg_masks['required'] + [neg_strategy] - neg_points = self._multi_mask_sample_points(neg_masks, - is_negative=[False] * len(self._neg_masks['required']) + [True]) - - return pos_points + neg_points - - def _multi_mask_sample_points(self, selected_masks, is_negative, with_first_click=False): - selected_masks = selected_masks[:self.max_num_points] - - each_obj_points = [ - self._sample_points(mask, is_negative=is_negative[i], - with_first_click=with_first_click) - for i, mask in enumerate(selected_masks) - ] - each_obj_points = [x for x in each_obj_points if len(x) > 0] - - points = [] - if len(each_obj_points) == 1: - points = each_obj_points[0] - elif len(each_obj_points) > 1: - if self.only_one_first_click: - each_obj_points = each_obj_points[:1] - - points = [obj_points[0] for obj_points in each_obj_points] - - aggregated_masks_with_prob = [] - for indx, x in enumerate(selected_masks): - if isinstance(x, (list, tuple)) and x and isinstance(x[0], (list, tuple)): - for t, prob in x: - aggregated_masks_with_prob.append((t, prob / len(selected_masks))) - else: - aggregated_masks_with_prob.append((x, 1.0 / len(selected_masks))) - - other_points_union = self._sample_points(aggregated_masks_with_prob, is_negative=True) - if len(other_points_union) + len(points) <= self.max_num_points: - points.extend(other_points_union) - else: - points.extend(random.sample(other_points_union, self.max_num_points - len(points))) - - if len(points) < self.max_num_points: - points.extend([(-1, -1, -1)] * (self.max_num_points - len(points))) - - return points - - def _sample_points(self, mask, is_negative=False, with_first_click=False): - if is_negative: - num_points = np.random.choice(np.arange(self.max_num_points + 1), p=self._neg_probs) - else: - num_points = 1 + np.random.choice(np.arange(self.max_num_points), p=self._pos_probs) - - indices_probs = None - if isinstance(mask, (list, tuple)): - indices_probs = [x[1] for x in mask] - indices = [(np.argwhere(x), prob) for x, prob in mask] - if indices_probs: - assert math.isclose(sum(indices_probs), 1.0) - else: - indices = np.argwhere(mask) - - points = [] - for j in range(num_points): - first_click = with_first_click and j == 0 and indices_probs is None - - if first_click: - point_indices = get_point_candidates(mask, k=self.sfc_inner_k, full_prob=self.sfc_full_inner_prob) - elif indices_probs: - point_indices_indx = np.random.choice(np.arange(len(indices)), p=indices_probs) - point_indices = indices[point_indices_indx][0] - else: - point_indices = indices - - num_indices = len(point_indices) - #所以最后一个click是用于判断是否是first click - if num_indices > 0: - point_indx = 0 if first_click else 100 - click = point_indices[np.random.randint(0, num_indices)].tolist() + [point_indx] - points.append(click) - - return points - - def _positive_erode(self, mask): - if random.random() > self.positive_erode_prob: - return mask - - kernel = np.ones((3, 3), np.uint8) - eroded_mask = cv2.erode(mask.astype(np.uint8),kernel, iterations=self.positive_erode_iters).astype(np.bool) - - - if eroded_mask.sum() > 10: - return eroded_mask - else: - return mask - - def _get_border_mask(self, mask): - expand_r = int(np.ceil(self.expand_ratio * np.sqrt(mask.sum()))) - kernel = np.ones((3, 3), np.uint8) - expanded_mask = cv2.dilate(mask.astype(np.uint8), kernel, iterations=expand_r) - expanded_mask[mask.astype(np.bool)] = 0 - return expanded_mask - - -@lru_cache(maxsize=None) -def generate_probs(max_num_points, gamma): - probs = [] - last_value = 1 - for i in range(max_num_points): - probs.append(last_value) - last_value *= gamma - - probs = np.array(probs) - probs /= probs.sum() - - return probs - - -def get_point_candidates(obj_mask, k=1.7, full_prob=0.0): - if full_prob > 0 and random.random() < full_prob: - return obj_mask - - padded_mask = np.pad(obj_mask, ((1, 1), (1, 1)), 'constant') - - dt = cv2.distanceTransform(padded_mask.astype(np.uint8), cv2.DIST_L2, 0)[1:-1, 1:-1] - if k > 0: - inner_mask = dt > dt.max() / k - return np.argwhere(inner_mask) - else: - prob_map = dt.flatten() - prob_map /= max(prob_map.sum(), 1e-6) - click_indx = np.random.choice(len(prob_map), p=prob_map) - click_coords = np.unravel_index(click_indx, dt.shape) - return np.array([click_coords]) diff --git a/contrib/EISeg/eiseg/data/sample.py b/contrib/EISeg/eiseg/data/sample.py deleted file mode 100644 index 78237577d9..0000000000 --- a/contrib/EISeg/eiseg/data/sample.py +++ /dev/null @@ -1,148 +0,0 @@ -import numpy as np -from copy import deepcopy -from util.misc import get_labels_with_sizes -from data.transforms import remove_image_only_transforms -from albumentations import ReplayCompose - - -class DSample: - def __init__(self, image, encoded_masks, objects=None, - objects_ids=None, ignore_ids=None, sample_id=None): - self.image = image - self.sample_id = sample_id - - if len(encoded_masks.shape) == 2: - encoded_masks = encoded_masks[:, :, np.newaxis] - self._encoded_masks = encoded_masks - self._ignored_regions = [] - - if objects_ids is not None: - if not objects_ids or not isinstance(objects_ids[0], tuple): - assert encoded_masks.shape[2] == 1 - objects_ids = [(0, obj_id) for obj_id in objects_ids] - - self._objects = dict() - for indx, obj_mapping in enumerate(objects_ids): - self._objects[indx] = { - 'parent': None, - 'mapping': obj_mapping, - 'children': [] - } - - if ignore_ids: - if isinstance(ignore_ids[0], tuple): - self._ignored_regions = ignore_ids - else: - self._ignored_regions = [(0, region_id) for region_id in ignore_ids] - else: - self._objects = deepcopy(objects) - - self._augmented = False - self._soft_mask_aug = None - self._original_data = self.image, self._encoded_masks, deepcopy(self._objects) - - def augment(self, augmentator): - self.reset_augmentation() - aug_output = augmentator(image=self.image, mask=self._encoded_masks) - self.image = aug_output['image'] - self._encoded_masks = aug_output['mask'] - - aug_replay = aug_output.get('replay', None) - if aug_replay: - assert len(self._ignored_regions) == 0 - mask_replay = remove_image_only_transforms(aug_replay) - self._soft_mask_aug = ReplayCompose._restore_for_replay(mask_replay) - - self._compute_objects_areas() - self.remove_small_objects(min_area=1) - - self._augmented = True - - def reset_augmentation(self): - if not self._augmented: - return - orig_image, orig_masks, orig_objects = self._original_data - self.image = orig_image - self._encoded_masks = orig_masks - self._objects = deepcopy(orig_objects) - self._augmented = False - self._soft_mask_aug = None - - def remove_small_objects(self, min_area): - if self._objects and not 'area' in list(self._objects.values())[0]: - self._compute_objects_areas() - - for obj_id, obj_info in list(self._objects.items()): - if obj_info['area'] < min_area: - self._remove_object(obj_id) - - def get_object_mask(self, obj_id): - layer_indx, mask_id = self._objects[obj_id]['mapping'] - obj_mask = (self._encoded_masks[:, :, layer_indx] == mask_id).astype(np.int32) - if self._ignored_regions: - for layer_indx, mask_id in self._ignored_regions: - ignore_mask = self._encoded_masks[:, :, layer_indx] == mask_id - obj_mask[ignore_mask] = -1 - - return obj_mask - - def get_soft_object_mask(self, obj_id): - assert self._soft_mask_aug is not None - original_encoded_masks = self._original_data[1] - layer_indx, mask_id = self._objects[obj_id]['mapping'] - obj_mask = (original_encoded_masks[:, :, layer_indx] == mask_id).astype(np.float32) - obj_mask = self._soft_mask_aug(image=obj_mask, mask=original_encoded_masks)['image'] - return np.clip(obj_mask, 0, 1) - - def get_background_mask(self): - return np.max(self._encoded_masks, axis=2) == 0 - - @property - def objects_ids(self): - return list(self._objects.keys()) - - @property - def gt_mask(self): - assert len(self._objects) == 1 - return self.get_object_mask(self.objects_ids[0]) - - @property - def root_objects(self): - return [obj_id for obj_id, obj_info in self._objects.items() if obj_info['parent'] is None] - - def _compute_objects_areas(self): - inverse_index = {node['mapping']: node_id for node_id, node in self._objects.items()} - ignored_regions_keys = set(self._ignored_regions) - - for layer_indx in range(self._encoded_masks.shape[2]): - objects_ids, objects_areas = get_labels_with_sizes(self._encoded_masks[:, :, layer_indx]) - for obj_id, obj_area in zip(objects_ids, objects_areas): - inv_key = (layer_indx, obj_id) - if inv_key in ignored_regions_keys: - continue - try: - self._objects[inverse_index[inv_key]]['area'] = obj_area - del inverse_index[inv_key] - except KeyError: - layer = self._encoded_masks[:, :, layer_indx] - layer[layer == obj_id] = 0 - self._encoded_masks[:, :, layer_indx] = layer - - for obj_id in inverse_index.values(): - self._objects[obj_id]['area'] = 0 - - def _remove_object(self, obj_id): - obj_info = self._objects[obj_id] - obj_parent = obj_info['parent'] - for child_id in obj_info['children']: - self._objects[child_id]['parent'] = obj_parent - - if obj_parent is not None: - parent_children = self._objects[obj_parent]['children'] - parent_children = [x for x in parent_children if x != obj_id] - self._objects[obj_parent]['children'] = parent_children + obj_info['children'] - - del self._objects[obj_id] - - def __len__(self): - return len(self._objects) diff --git a/contrib/EISeg/eiseg/data/transforms.py b/contrib/EISeg/eiseg/data/transforms.py deleted file mode 100644 index 5fb7166108..0000000000 --- a/contrib/EISeg/eiseg/data/transforms.py +++ /dev/null @@ -1,178 +0,0 @@ -import cv2 -import random -import numpy as np - -from albumentations.core.serialization import SERIALIZABLE_REGISTRY -from albumentations import ImageOnlyTransform, DualTransform -from albumentations.core.transforms_interface import to_tuple -from albumentations.augmentations import functional as F -from util.misc import get_bbox_from_mask, expand_bbox, clamp_bbox, get_labels_with_sizes - - -class UniformRandomResize(DualTransform): - def __init__(self, scale_range=(0.9, 1.1), interpolation=cv2.INTER_LINEAR, always_apply=False, p=1): - super().__init__(always_apply, p) - self.scale_range = scale_range - self.interpolation = interpolation - - def get_params_dependent_on_targets(self, params): - scale = random.uniform(*self.scale_range) - height = int(round(params['image'].shape[0] * scale)) - width = int(round(params['image'].shape[1] * scale)) - return {'new_height': height, 'new_width': width} - - def apply(self, img, new_height=0, new_width=0, interpolation=cv2.INTER_LINEAR, **params): - return F.resize(img, height=new_height, width=new_width, interpolation=interpolation) - - def apply_to_keypoint(self, keypoint, new_height=0, new_width=0, **params): - scale_x = new_width / params["cols"] - scale_y = new_height / params["rows"] - return F.keypoint_scale(keypoint, scale_x, scale_y) - - def get_transform_init_args_names(self): - return "scale_range", "interpolation" - - @property - def targets_as_params(self): - return ["image"] - - -class ZoomIn(DualTransform): - def __init__( - self, - height, - width, - bbox_jitter=0.1, - expansion_ratio=1.4, - min_crop_size=200, - min_area=100, - always_resize=False, - always_apply=False, - p=0.5, - ): - super(ZoomIn, self).__init__(always_apply, p) - self.height = height - self.width = width - self.bbox_jitter = to_tuple(bbox_jitter) - self.expansion_ratio = expansion_ratio - self.min_crop_size = min_crop_size - self.min_area = min_area - self.always_resize = always_resize - - def apply(self, img, selected_object, bbox, **params): - if selected_object is None: - if self.always_resize: - img = F.resize(img, height=self.height, width=self.width) - return img - - rmin, rmax, cmin, cmax = bbox - img = img[rmin:rmax + 1, cmin:cmax + 1] - img = F.resize(img, height=self.height, width=self.width) - - return img - - def apply_to_mask(self, mask, selected_object, bbox, **params): - if selected_object is None: - if self.always_resize: - mask = F.resize(mask, height=self.height, width=self.width, - interpolation=cv2.INTER_NEAREST) - return mask - - rmin, rmax, cmin, cmax = bbox - mask = mask[rmin:rmax + 1, cmin:cmax + 1] - if isinstance(selected_object, tuple): - layer_indx, mask_id = selected_object - obj_mask = mask[:, :, layer_indx] == mask_id - new_mask = np.zeros_like(mask) - new_mask[:, :, layer_indx][obj_mask] = mask_id - else: - obj_mask = mask == selected_object - new_mask = mask.copy() - new_mask[np.logical_not(obj_mask)] = 0 - - new_mask = F.resize(new_mask, height=self.height, width=self.width, - interpolation=cv2.INTER_NEAREST) - return new_mask - - def get_params_dependent_on_targets(self, params): - instances = params['mask'] - - is_mask_layer = len(instances.shape) > 2 - candidates = [] - if is_mask_layer: - for layer_indx in range(instances.shape[2]): - labels, areas = get_labels_with_sizes(instances[:, :, layer_indx]) - candidates.extend([(layer_indx, obj_id) - for obj_id, area in zip(labels, areas) - if area > self.min_area]) - else: - labels, areas = get_labels_with_sizes(instances) - candidates = [obj_id for obj_id, area in zip(labels, areas) - if area > self.min_area] - - selected_object = None - bbox = None - if candidates: - selected_object = random.choice(candidates) - if is_mask_layer: - layer_indx, mask_id = selected_object - obj_mask = instances[:, :, layer_indx] == mask_id - else: - obj_mask = instances == selected_object - - bbox = get_bbox_from_mask(obj_mask) - - if isinstance(self.expansion_ratio, tuple): - expansion_ratio = random.uniform(*self.expansion_ratio) - else: - expansion_ratio = self.expansion_ratio - - bbox = expand_bbox(bbox, expansion_ratio, self.min_crop_size) - bbox = self._jitter_bbox(bbox) - bbox = clamp_bbox(bbox, 0, obj_mask.shape[0] - 1, 0, obj_mask.shape[1] - 1) - - return { - 'selected_object': selected_object, - 'bbox': bbox - } - - def _jitter_bbox(self, bbox): - rmin, rmax, cmin, cmax = bbox - height = rmax - rmin + 1 - width = cmax - cmin + 1 - rmin = int(rmin + random.uniform(*self.bbox_jitter) * height) - rmax = int(rmax + random.uniform(*self.bbox_jitter) * height) - cmin = int(cmin + random.uniform(*self.bbox_jitter) * width) - cmax = int(cmax + random.uniform(*self.bbox_jitter) * width) - - return rmin, rmax, cmin, cmax - - def apply_to_bbox(self, bbox, **params): - raise NotImplementedError - - def apply_to_keypoint(self, keypoint, **params): - raise NotImplementedError - - @property - def targets_as_params(self): - return ["mask"] - - def get_transform_init_args_names(self): - return ("height", "width", "bbox_jitter", - "expansion_ratio", "min_crop_size", "min_area", "always_resize") - - -def remove_image_only_transforms(sdict): - if not 'transforms' in sdict: - return sdict - - keep_transforms = [] - for tdict in sdict['transforms']: - cls = SERIALIZABLE_REGISTRY[tdict['__class_fullname__']] - if 'transforms' in tdict: - keep_transforms.append(remove_image_only_transforms(tdict)) - elif not issubclass(cls, ImageOnlyTransform): - keep_transforms.append(tdict) - sdict['transforms'] = keep_transforms - - return sdict \ No newline at end of file diff --git a/contrib/EISeg/eiseg/inference/evaluation.py b/contrib/EISeg/eiseg/inference/evaluation.py deleted file mode 100644 index 0426b6e373..0000000000 --- a/contrib/EISeg/eiseg/inference/evaluation.py +++ /dev/null @@ -1,53 +0,0 @@ -from time import time -import numpy as np -import paddle -from .utils import * -from inference.clicker import Clicker -from tqdm import tqdm -import scipy.misc as sm -import os - - -def evaluate_dataset(dataset, predictor, oracle_eval=False, **kwargs): - all_ious = [] - - start_time = time() - for index in tqdm(range(len(dataset)), leave=False): - sample = dataset.get_sample(index) - _, sample_ious, _ = evaluate_sample(sample.image, sample.gt_mask, predictor, - sample_id=index, **kwargs) - all_ious.append(sample_ious) - - end_time = time() - elapsed_time = end_time - start_time - - return all_ious, elapsed_time - - -def evaluate_sample(image, gt_mask, predictor, max_iou_thr, - pred_thr=0.49, min_clicks=1, max_clicks=20, - sample_id=None, callback=None): - clicker = Clicker(gt_mask=gt_mask) - pred_mask = np.zeros_like(gt_mask) - ious_list = [] - - predictor.set_input_image(image) - pred_probs = None - - for click_indx in range(max_clicks): - clicker.make_next_click(pred_mask) - pred_probs = predictor.get_prediction(clicker) - pred_mask = pred_probs > pred_thr - - if callback is not None: - callback(image, gt_mask, pred_probs, sample_id, click_indx, clicker.clicks_list) - - iou = get_iou(gt_mask, pred_mask) - ious_list.append(iou) - - if iou >= max_iou_thr and click_indx + 1 >= min_clicks: - # image_name = str(time()) + '.png' - # sm.imsave(os.path.join('result', image_name), pred_probs) - break - - return clicker.clicks_list, np.array(ious_list, dtype=np.float32), pred_probs diff --git a/contrib/EISeg/eiseg/inference/predictor/__init__.py b/contrib/EISeg/eiseg/inference/predictor/__init__.py deleted file mode 100644 index 28386d64f0..0000000000 --- a/contrib/EISeg/eiseg/inference/predictor/__init__.py +++ /dev/null @@ -1,97 +0,0 @@ -from paddle.fluid.layers.rnn import dynamic_decode -from .base import BasePredictor -from .brs import InputBRSPredictor, FeatureBRSPredictor, HRNetFeatureBRSPredictor -from .brs_functors import InputOptimizer, ScaleBiasOptimizer -from inference.transforms import ZoomIn -from model.is_hrnet_model import HRNetModel - - -def get_predictor(net, brs_mode, - prob_thresh=0.49, - with_flip=True, - zoom_in_params=dict(), - predictor_params=None, - brs_opt_func_params=None, - lbfgs_params=None): - - lbfgs_params_ = { - 'm': 20, - 'factr': 0, - 'pgtol': 1e-8, - 'maxfun': 20, - } - - predictor_params_ = { - 'optimize_after_n_clicks': 1 - } - - if zoom_in_params is not None: - zoom_in = ZoomIn(**zoom_in_params) - else: - zoom_in = None - - if lbfgs_params is not None: - lbfgs_params_.update(lbfgs_params) - - lbfgs_params_['maxiter'] = 2 * lbfgs_params_['maxfun'] - - if brs_opt_func_params is None: - brs_opt_func_params = dict() - - if brs_mode == 'NoBRS': - if predictor_params is not None: - predictor_params_.update(predictor_params) - predictor = BasePredictor(net, zoom_in=zoom_in, with_flip=with_flip, **predictor_params_) - elif brs_mode.startswith('f-BRS'): - predictor_params_.update({ - 'net_clicks_limit': 8, - }) - if predictor_params is not None: - predictor_params_.update(predictor_params) - - insertion_mode = { - 'f-BRS-A': 'after_c4', - 'f-BRS-B': 'after_aspp', - 'f-BRS-C': 'after_deeplab' - }[brs_mode] - - opt_functor = ScaleBiasOptimizer(prob_thresh=prob_thresh, - with_flip=with_flip, - optimizer_params=lbfgs_params_, - **brs_opt_func_params) - - if isinstance(net, HRNetModel): - FeaturePredictor = HRNetFeatureBRSPredictor - insertion_mode = {'after_c4': 'A', 'after_aspp': 'A', 'after_deeplab': 'C'}[insertion_mode] - else: - FeaturePredictor = FeatureBRSPredictor - - predictor = FeaturePredictor(net, - opt_functor=opt_functor, - with_flip=with_flip, - insertion_mode=insertion_mode, - zoom_in=zoom_in, - **predictor_params_) - - elif brs_mode == 'RGB-BRS' or brs_mode == 'DistMap-BRS': - use_dmaps = brs_mode == 'DistMap-BRS' - predictor_params_.update({ - 'net_clicks_limit': 5, - }) - if predictor_params is not None: - predictor_params_.update(predictor_params) - - opt_functor = InputOptimizer(prob_thresh=prob_thresh, - with_flip=with_flip, - optimizer_params=lbfgs_params_, - **brs_opt_func_params) - - predictor = InputBRSPredictor(net, - optimize_target='dmaps' if use_dmaps else 'rgb', - opt_functor=opt_functor, - with_flip=with_flip, - zoom_in=zoom_in, - **predictor_params_) - else: - raise NotImplementedError - return predictor diff --git a/contrib/EISeg/eiseg/inference/predictor/brs.py b/contrib/EISeg/eiseg/inference/predictor/brs.py deleted file mode 100644 index f87732b91b..0000000000 --- a/contrib/EISeg/eiseg/inference/predictor/brs.py +++ /dev/null @@ -1,313 +0,0 @@ -import paddle -import paddle.nn.functional as F -import numpy as np -from scipy.optimize import fmin_l_bfgs_b - -from .base import BasePredictor - - -class BRSBasePredictor(BasePredictor): - def __init__(self, model, opt_functor, optimize_after_n_clicks=1, **kwargs): - super().__init__(model, **kwargs) - self.optimize_after_n_clicks = optimize_after_n_clicks - self.opt_functor = opt_functor - - self.opt_data = None - self.input_data = None - - def set_input_image(self, image_nd): - super().set_input_image(image_nd) - self.opt_data = None - self.input_data = None - - def _get_clicks_maps_nd(self, clicks_lists, image_shape, radius=1): - pos_clicks_map = np.zeros([len(clicks_lists), 1] + image_shape, dtype=np.float32) - neg_clicks_map = np.zeros([len(clicks_lists), 1] + image_shape, dtype=np.float32) - - for list_indx, clicks_list in enumerate(clicks_lists): - for click in clicks_list: - y, x = click.coords - y, x = int(round(y)), int(round(x)) - y1, x1 = y - radius, x - radius - y2, x2 = y + radius + 1, x + radius + 1 - - if click.is_positive: - pos_clicks_map[list_indx, 0, y1:y2, x1:x2] = True - else: - neg_clicks_map[list_indx, 0, y1:y2, x1:x2] = True - - with paddle.no_grad(): - pos_clicks_map = paddle.to_tensor(pos_clicks_map) - neg_clicks_map = paddle.to_tensor(neg_clicks_map) - return pos_clicks_map, neg_clicks_map - - def get_states(self): - return {'transform_states': self._get_transform_states(), 'opt_data': self.opt_data} - - def set_states(self, states): - self._set_transform_states(states['transform_states']) - self.opt_data = states['opt_data'] - - -class FeatureBRSPredictor(BRSBasePredictor): - def __init__(self, model, opt_functor, insertion_mode='after_deeplab', **kwargs): - super().__init__(model, opt_functor=opt_functor, **kwargs) - self.insertion_mode = insertion_mode - self._c1_features = None - self.model = model - - if self.insertion_mode == 'after_deeplab': - self.num_channels = model.feature_extractor.ch - elif self.insertion_mode == 'after_c4': - self.num_channels = model.feature_extractor.aspp_in_channels - elif self.insertion_mode == 'after_aspp': - self.num_channels = model.feature_extractor.ch + 32 - else: - raise NotImplementedError - - def _get_prediction(self, image_nd, clicks_lists, is_image_changed): - points_nd = self.get_points_nd(clicks_lists) - pos_mask, neg_mask = self._get_clicks_maps_nd(clicks_lists, image_nd.shape[2:]) - - num_clicks = len(clicks_lists[0]) - bs = image_nd.shape[0] // 2 if self.with_flip else image_nd.shape[0] - - if self.opt_data is None or self.opt_data.shape[0] // (2 * self.num_channels) != bs: - self.opt_data = np.zeros((bs * 2 * self.num_channels), dtype=np.float64) - - if num_clicks <= self.net_clicks_limit or is_image_changed or self.input_data is None: - self.input_data = self._get_head_input(image_nd, points_nd) - - def get_prediction_logits(scale, bias): - scale = scale.reshape([bs, -1, 1, 1]) - bias = bias.reshape([bs, -1, 1, 1]) - if self.with_flip: - scale = scale.tile([2, 1, 1, 1]) - bias = bias.tile([2, 1, 1, 1]) - - scaled_backbone_features = self.input_data * scale - scaled_backbone_features = scaled_backbone_features + bias - if self.insertion_mode == 'after_c4': - x = self.net.feature_extractor.aspp(scaled_backbone_features) - x = F.interpolate(x, mode='bilinear', size=self._c1_features.shape[2:], align_corners=True) - x = paddle.concat((x, self._c1_features), axis=1) - scaled_backbone_features = self.net.feature_extractor.head(x) - - elif self.insertion_mode == 'after_aspp': - scaled_backbone_features = self.net.feature_extractor.head(scaled_backbone_features) - - pred_logits = self.net.head(scaled_backbone_features) - pred_logits = F.interpolate(pred_logits, size=image_nd.shape[2:], mode='bilinear', - align_corners=True) - return pred_logits - - self.opt_functor.init_click(get_prediction_logits, pos_mask, neg_mask) - if num_clicks > self.optimize_after_n_clicks: - opt_result = fmin_l_bfgs_b(func=self.opt_functor, x0=self.opt_data, - **self.opt_functor.optimizer_params) - self.opt_data = opt_result[0] - with paddle.no_grad(): - if self.opt_functor.best_prediction is not None: - opt_pred_logits = self.opt_functor.best_prediction - else: - opt_data_nd = paddle.to_tensor(self.opt_data) - scale, bias, _ = self.opt_functor.unpack_opt_params(opt_data_nd) - opt_pred_logits = get_prediction_logits(scale, bias) - - return opt_pred_logits - - def _get_head_input(self, image_nd, points): - with paddle.no_grad(): - image_nd, prev_mask = self.net.prepare_input(image_nd) - coord_features = self.net.get_coord_features(image_nd, prev_mask, points) - - if self.net.rgb_conv is not None: - x = self.net.rgb_conv(paddle.concat((image_nd, coord_features), axis=1)) - additional_features = None - elif hasattr(self.net, 'maps_transform'): - x = image_nd - additional_features = self.net.maps_transform(coord_features) - - if self.insertion_mode == 'after_c4' or self.insertion_mode == 'after_aspp': - c1, _, c3, c4 = self.net.feature_extractor.backbone(x, additional_features) - c1 = self.net.feature_extractor.skip_project(c1) - - if self.insertion_mode == 'after_aspp': - x = self.net.feature_extractor.aspp(c4) - x = F.interpolate(x, size=c1.shape[2:], mode='bilinear', align_corners=True) - x = paddle.concat((x, c1), axis=1) - backbone_features = x - else: - backbone_features = c4 - self._c1_features = c1 - else: - backbone_features = self.net.feature_extractor(x, additional_features)[0] - - return backbone_features - - -class HRNetFeatureBRSPredictor(BRSBasePredictor): - def __init__(self, model, opt_functor, insertion_mode='A', **kwargs): - super().__init__(model, opt_functor=opt_functor, **kwargs) - self.insertion_mode = insertion_mode - self._c1_features = None - # sself.model = model - - if self.insertion_mode == 'A': - self.num_channels = sum(k * model.feature_extractor.width for k in [1, 2, 4, 8]) - elif self.insertion_mode == 'C': - self.num_channels = 2 * model.feature_extractor.ocr_width - else: - raise NotImplementedError - - def _get_prediction(self, image_nd, clicks_lists, is_image_changed): - points_nd = self.get_points_nd(clicks_lists) - pos_mask, neg_mask = self._get_clicks_maps_nd(clicks_lists, image_nd.shape[2:]) - num_clicks = len(clicks_lists[0]) - bs = image_nd.shape[0] // 2 if self.with_flip else image_nd.shape[0] - - if self.opt_data is None or self.opt_data.shape[0] // (2 * self.num_channels) != bs: - self.opt_data = np.zeros((bs * 2 * self.num_channels), dtype=np.float64) - - if num_clicks <= self.net_clicks_limit or is_image_changed or self.input_data is None: - self.input_data = self._get_head_input(image_nd, points_nd) - - def get_prediction_logits(scale, bias): - scale = scale.reshape([bs, -1, 1, 1]) - bias = bias.reshape([bs, -1, 1, 1]) - if self.with_flip: - scale = scale.tile([2, 1, 1, 1]) - bias = bias.tile([2, 1, 1, 1]) - - scaled_backbone_features = self.input_data * scale - scaled_backbone_features = scaled_backbone_features + bias - - if self.insertion_mode == 'A': - if self.net.feature_extractor.ocr_width > 0: - out_aux = self.net.feature_extractor.aux_head(scaled_backbone_features) - out_aux.stop_gradient = False - feats = self.net.feature_extractor.conv3x3_ocr(scaled_backbone_features) - feats.stop_gradient = False - context = self.net.feature_extractor.ocr_gather_head(feats, out_aux) - context.stop_gradient = False - feats = self.net.feature_extractor.ocr_distri_head(feats, context) - feats.stop_gradient = False - else: - feats = scaled_backbone_features - pred_logits = self.net.feature_extractor.cls_head(feats) - pred_logits.stop_gradient = False - elif self.insertion_mode == 'C': - pred_logits = self.net.feature_extractor.cls_head(scaled_backbone_features) - else: - raise NotImplementedError - - pred_logits = F.interpolate(pred_logits, size=image_nd.shape[2:], mode='bilinear', - align_corners=True) - return pred_logits - - self.opt_functor.init_click(get_prediction_logits, pos_mask, neg_mask) - if num_clicks > self.optimize_after_n_clicks: - opt_result = fmin_l_bfgs_b(func=self.opt_functor, x0=self.opt_data, - **self.opt_functor.optimizer_params) - self.opt_data = opt_result[0] - - with paddle.no_grad(): - if self.opt_functor.best_prediction is not None: - opt_pred_logits = self.opt_functor.best_prediction - else: - opt_data_nd = paddle.to_tensor(self.opt_data) - opt_vars, _ = self.opt_functor.unpack_opt_params(opt_data_nd) - opt_pred_logits = get_prediction_logits(*opt_vars) - - return opt_pred_logits - - def _get_head_input(self, image_nd, points): - with paddle.no_grad(): - image_nd, prev_mask = self.net.prepare_input(image_nd) - coord_features = self.net.get_coord_features(image_nd, prev_mask, points) - - if self.net.rgb_conv is not None: - x = self.net.rgb_conv(paddle.concat((image_nd, coord_features), axis=1)) - additional_features = None - else: - x = image_nd - additional_features = self.net.maps_transform(coord_features) - - feats = self.net.feature_extractor.compute_hrnet_feats(x, additional_features) - - if self.insertion_mode == 'A': - backbone_features = feats - elif self.insertion_mode == 'C': - out_aux = self.net.feature_extractor.aux_head(feats) - feats = self.net.feature_extractor.conv3x3_ocr(feats) - - context = self.net.feature_extractor.ocr_gather_head(feats, out_aux) - backbone_features = self.net.feature_extractor.ocr_distri_head(feats, context) - else: - raise NotImplementedError - - return backbone_features - - -class InputBRSPredictor(BRSBasePredictor): - def __init__(self, model, opt_functor, optimize_target='rgb', **kwargs): - super().__init__(model, opt_functor=opt_functor, **kwargs) - self.optimize_target = optimize_target - - def _get_prediction(self, image_nd, clicks_lists, is_image_changed): - points_nd = self.get_points_nd(clicks_lists) - pos_mask, neg_mask = self._get_clicks_maps_nd(clicks_lists, image_nd.shape[2:]) - num_clicks = len(clicks_lists[0]) - - if self.opt_data is None or is_image_changed: - if self.optimize_target == 'dmaps': - opt_channels = self.net.coord_feature_ch - 1 if self.net.with_prev_mask else self.net.coord_feature_ch - else: - opt_channels = 3 - - bs = image_nd.shape[0] // 2 if self.with_flip else image_nd.shape[0] - self.opt_data = paddle.zeros((bs, opt_channels, image_nd.shape[2], image_nd.shape[3]), dtype='float64') - - def get_prediction_logits(opt_bias): - input_image, prev_mask = self.net.prepare_input(image_nd) - dmaps = self.net.get_coord_features(input_image, prev_mask, points_nd) - - if self.optimize_target == 'rgb': - input_image = input_image + opt_bias - - elif self.optimize_target == 'dmaps': - if self.net.with_prev_mask: - dmaps[:, 1:, :, :] = dmaps[:, 1:, :, :] + opt_bias - else: - dmaps = dmaps + opt_bias - - if self.net.rgb_conv is not None: - x = self.net.rgb_conv(paddle.concat((input_image, dmaps), axis=1)) - if self.optimize_target == 'all': - x = x + opt_bias - coord_features = None - elif hasattr(self.net, 'maps_transform'): - x = input_image - coord_features = self.net.maps_transform(dmaps) - - pred_logits = self.net.backbone_forward(x, coord_features=coord_features)['instances'] - pred_logits = F.interpolate(pred_logits, size=image_nd.shape[2:], mode='bilinear', align_corners=True) - - return pred_logits - - self.opt_functor.init_click(get_prediction_logits, pos_mask, neg_mask, shape=self.opt_data.shape) - if num_clicks > self.optimize_after_n_clicks: - opt_result = fmin_l_bfgs_b(func=self.opt_functor, x0=self.opt_data.cpu().numpy().ravel(), - **self.opt_functor.optimizer_params) - - self.opt_data = paddle.to_tensor(opt_result[0]) - self.opt_data = paddle.reshape(self.opt_data, shape=self.opt_data.shape) - - with paddle.no_grad(): - if self.opt_functor.best_prediction is not None: - opt_pred_logits = self.opt_functor.best_prediction - else: - opt_vars, _ = self.opt_functor.unpack_opt_params(self.opt_data) - opt_pred_logits = get_prediction_logits(*opt_vars) - - return opt_pred_logits diff --git a/contrib/EISeg/eiseg/inference/predictor/brs_functors.py b/contrib/EISeg/eiseg/inference/predictor/brs_functors.py deleted file mode 100644 index c8b68df34f..0000000000 --- a/contrib/EISeg/eiseg/inference/predictor/brs_functors.py +++ /dev/null @@ -1,118 +0,0 @@ -import paddle -import paddle.nn.functional as F -import numpy as np - -from model.metrics import _compute_iou -from .brs_losses import BRSMaskLoss - - -class BaseOptimizer: - def __init__(self, optimizer_params, - prob_thresh=0.49, - reg_weight=1e-3, - min_iou_diff=0.01, - brs_loss=BRSMaskLoss(), - with_flip=False, - flip_average=False, - **kwargs): - self.brs_loss = brs_loss - self.optimizer_params = optimizer_params - self.prob_thresh = prob_thresh - self.reg_weight = reg_weight - self.min_iou_diff = min_iou_diff - self.with_flip = with_flip - self.flip_average = flip_average - - self.best_prediction = None - self._get_prediction_logits = None - self._opt_shape = None - self._best_loss = None - self._click_masks = None - self._last_mask = None - - def init_click(self, get_prediction_logits, pos_mask, neg_mask, shape=None): - self.best_prediction = None - self._get_prediction_logits = get_prediction_logits - self._click_masks = (pos_mask, neg_mask) - self._opt_shape = shape - self._last_mask = None - - def __call__(self, x): - opt_params = paddle.to_tensor(x).astype('float64') - opt_params.stop_gradient = False -# with enable_grad(): -# scale, bias, reg_loss = self.unpack_opt_params(opt_params) -# result_before_sigmoid = self._get_prediction_logits(scale, bias) -# result = F.sigmoid(result_before_sigmoid) -# pos_mask, neg_mask = self._click_masks -# if self.with_flip and self.flip_average: -# result, result_flipped = paddle.chunk(result, 2, axis=0) -# result = 0.5 * (result + paddle.flip(result_flipped, axis=[3])) -# pos_mask, neg_mask = pos_mask[:result.shape[0]], neg_mask[:result.shape[0]] - -# loss, f_max_pos, f_max_neg = self.brs_loss(result, pos_mask, neg_mask) -# loss = loss + reg_loss - - opt_vars, reg_loss = self.unpack_opt_params(opt_params) - result_before_sigmoid = self._get_prediction_logits(*opt_vars) - result = F.sigmoid(result_before_sigmoid) - - pos_mask, neg_mask = self._click_masks - if self.with_flip and self.flip_average: - result, result_flipped = paddle.chunk(result, 2, axis=0) - result = 0.5 * (result + paddle.flip(result_flipped, axis=[3])) - pos_mask, neg_mask = pos_mask[:result.shape[0]], neg_mask[:result.shape[0]] - - loss, f_max_pos, f_max_neg = self.brs_loss(result, pos_mask, neg_mask) - loss = loss + reg_loss - - f_val = loss.detach().numpy() - if self.best_prediction is None or f_val < self._best_loss: - self.best_prediction = result_before_sigmoid.detach() - self._best_loss = f_val - if f_max_pos < (1 - self.prob_thresh) and f_max_neg < self.prob_thresh: - return [f_val, np.zeros_like(x)] - - current_mask = result > self.prob_thresh - if self._last_mask is not None and self.min_iou_diff > 0: - diff_iou = _compute_iou(current_mask, self._last_mask) - if len(diff_iou) > 0 and diff_iou.mean() > 1 - self.min_iou_diff: - return [f_val, np.zeros_like(x)] - self._last_mask = current_mask - - loss.backward() - f_grad = opt_params.gradient().ravel().astype(np.float64) - - return [f_val, f_grad] - - def unpack_opt_params(self, opt_params): - raise NotImplementedError - - -class InputOptimizer(BaseOptimizer): - def unpack_opt_params(self, opt_params): - opt_params = opt_params.reshape(self._opt_shape) - if self.with_flip: - opt_params_flipped = paddle.flip(opt_params, axis=[3]) - opt_params = paddle.concat([opt_params, opt_params_flipped], axis=0) - reg_loss = self.reg_weight * paddle.sum(opt_params ** 2) - - return (opt_params,), reg_loss - - -class ScaleBiasOptimizer(BaseOptimizer): - def __init__(self, *args, scale_act=None, reg_bias_weight=10.0, **kwargs): - super().__init__(*args, **kwargs) - self.scale_act = scale_act - self.reg_bias_weight = reg_bias_weight - - def unpack_opt_params(self, opt_params): - scale, bias = paddle.chunk(opt_params, 2, axis=0) - reg_loss = self.reg_weight * (paddle.sum(scale ** 2) + self.reg_bias_weight * paddle.sum(bias ** 2)) - - if self.scale_act == 'tanh': - scale = paddle.tanh(scale) - elif self.scale_act == 'sin': - scale = paddle.sin(scale) - - return (1 + scale, bias), reg_loss diff --git a/contrib/EISeg/eiseg/inference/predictor/brs_losses.py b/contrib/EISeg/eiseg/inference/predictor/brs_losses.py deleted file mode 100644 index 04c6503789..0000000000 --- a/contrib/EISeg/eiseg/inference/predictor/brs_losses.py +++ /dev/null @@ -1,59 +0,0 @@ -import paddle -import paddle.nn as nn - -from model.losses import SigmoidBinaryCrossEntropyLoss - - -class BRSMaskLoss(nn.Layer): - def __init__(self, eps=1e-5): - super().__init__() - self._eps = eps - - def forward(self, result, pos_mask, neg_mask): - pos_diff = (1 - result) * pos_mask - pos_target = paddle.sum(pos_diff ** 2) - pos_target = pos_target / (paddle.sum(pos_mask) + self._eps) - - neg_diff = result * neg_mask - neg_target = paddle.sum(neg_diff ** 2) - neg_target = neg_target / (paddle.sum(neg_mask) + self._eps) - - loss = pos_target + neg_target - - with paddle.no_grad(): - f_max_pos = paddle.max(paddle.abs(pos_diff)) - f_max_neg = paddle.max(paddle.abs(neg_diff)) - - return loss, f_max_pos, f_max_neg - - -class OracleMaskLoss(nn.Layer): - def __init__(self): - super().__init__() - self.gt_mask = None - self.loss = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) - self.predictor = None - self.history = [] - - def set_gt_mask(self, gt_mask): - self.gt_mask = gt_mask - self.history = [] - - def forward(self, result, pos_mask, neg_mask): - gt_mask = self.gt_mask - if self.predictor.object_roi is not None: - r1, r2, c1, c2 = self.predictor.object_roi[:4] - gt_mask = gt_mask[:, :, r1:r2 + 1, c1:c2 + 1] - gt_mask = paddle.nn.functional.interpolate(gt_mask, result.size()[2:], mode='bilinear', align_corners=True) - - if result.shape[0] == 2: - gt_mask_flipped = paddle.flip(gt_mask, axis=[3]) - gt_mask = paddle.concat([gt_mask, gt_mask_flipped], axis=0) - - loss = self.loss(result, gt_mask) - self.history.append(loss.detach().cpu().numpy()[0]) - - if len(self.history) > 5 and abs(self.history[-5] - self.history[-1]) < 1e-5: - return 0, 0, 0 - - return loss, 1.0, 1.0 diff --git a/contrib/EISeg/eiseg/inference/utils.py b/contrib/EISeg/eiseg/inference/utils.py deleted file mode 100644 index 788ddb9e3f..0000000000 --- a/contrib/EISeg/eiseg/inference/utils.py +++ /dev/null @@ -1,144 +0,0 @@ -from datetime import timedelta -from pathlib import Path -import paddle - -import numpy as np - -from data.datasets import GrabCutDataset, BerkeleyDataset, DavisDataset, SBDEvaluationDataset, PascalVocDataset, HumanDataset -from util.serialization import load_model - - -def get_time_metrics(all_ious, elapsed_time): - n_images = len(all_ious) - n_clicks = sum(map(len, all_ious)) - - mean_spc = elapsed_time / n_clicks - mean_spi = elapsed_time / n_images - - return mean_spc, mean_spi - - -def load_is_model(checkpoint, **kwargs): - if isinstance(checkpoint, (str, Path)): - state_dict = paddle.load(checkpoint) - else: - state_dict = checkpoint - - if isinstance(state_dict, list): - model = load_single_is_model(state_dict[0], **kwargs) - models = [load_single_is_model(x, **kwargs) for x in state_dict] - - return model, models - else: - return load_single_is_model(state_dict, **kwargs) - - -def load_single_is_model(state_dict, **kwargs): - model = load_model(state_dict['config'], **kwargs) - model.load_state_dict(state_dict['state_dict'], strict=False) - - for param in model.parameters(): - param.requires_grad = False - model.eval() - - return model - - -def get_dataset(dataset_name, cfg): - if dataset_name == 'GrabCut': - dataset = GrabCutDataset(cfg.GRABCUT_PATH) - elif dataset_name == 'Berkeley': - dataset = BerkeleyDataset(cfg.BERKELEY_PATH) - elif dataset_name == 'DAVIS': - dataset = DavisDataset(cfg.DAVIS_PATH) - elif dataset_name == 'SBD': - dataset = SBDEvaluationDataset(cfg.SBD_PATH) - elif dataset_name == 'SBD_Train': - dataset = SBDEvaluationDataset(cfg.SBD_PATH, split='train') - elif dataset_name == 'PascalVOC': - dataset = PascalVocDataset(cfg.PASCALVOC_PATH, split='val') - elif dataset_name == 'COCO_MVal': - dataset = DavisDataset(cfg.COCO_MVAL_PATH) - elif dataset_name == 'Human': - dataset = HumanDataset(cfg.HUMAN_PATH, split='test') - else: - dataset = None - - return dataset - - -def get_iou(gt_mask, pred_mask, ignore_label=-1): - ignore_gt_mask_inv = gt_mask != ignore_label - obj_gt_mask = gt_mask == 1 - - intersection = np.logical_and(np.logical_and(pred_mask, obj_gt_mask), ignore_gt_mask_inv).sum() - union = np.logical_and(np.logical_or(pred_mask, obj_gt_mask), ignore_gt_mask_inv).sum() - - return intersection / union - - -def compute_noc_metric(all_ious, iou_thrs, max_clicks=20): - def _get_noc(iou_arr, iou_thr): - vals = iou_arr >= iou_thr - return np.argmax(vals) + 1 if np.any(vals) else max_clicks - - noc_list = [] - over_max_list = [] - for iou_thr in iou_thrs: - scores_arr = np.array([_get_noc(iou_arr, iou_thr) - for iou_arr in all_ious], dtype=np.int) - - score = scores_arr.mean() - over_max = (scores_arr == max_clicks).sum() - - noc_list.append(score) - over_max_list.append(over_max) - - return noc_list, over_max_list - - -def find_checkpoint(weights_folder, checkpoint_name): - weights_folder = Path(weights_folder) - if ':' in checkpoint_name: - model_name, checkpoint_name = checkpoint_name.split(':') - models_candidates = [x for x in weights_folder.glob(f'{model_name}*') if x.is_dir()] - assert len(models_candidates) == 1 - model_folder = models_candidates[0] - else: - model_folder = weights_folder - - if checkpoint_name.endswith('.pth'): - if Path(checkpoint_name).exists(): - checkpoint_path = checkpoint_name - else: - checkpoint_path = weights_folder / checkpoint_name - else: - model_checkpoints = list(model_folder.rglob(f'{checkpoint_name}*.pdparams')) - assert len(model_checkpoints) == 1 - checkpoint_path = model_checkpoints[0] - - return str(checkpoint_path) - - -def get_results_table(noc_list, over_max_list, brs_type, dataset_name, mean_spc, elapsed_time, - n_clicks=20, model_name=None): - table_header = (f'|{"BRS Type":^13}|{"Dataset":^11}|' - f'{"NoC@80%":^9}|{"NoC@85%":^9}|{"NoC@90%":^9}|' - f'{">="+str(n_clicks)+"@85%":^9}|{">="+str(n_clicks)+"@90%":^9}|' - f'{"SPC,s":^7}|{"Time":^9}|') - row_width = len(table_header) - - header = f'Eval results for model: {model_name}\n' if model_name is not None else '' - header += '-' * row_width + '\n' - header += table_header + '\n' + '-' * row_width - - eval_time = str(timedelta(seconds=int(elapsed_time))) - table_row = f'|{brs_type:^13}|{dataset_name:^11}|' - table_row += f'{noc_list[0]:^9.2f}|' - table_row += f'{noc_list[1]:^9.2f}|' if len(noc_list) > 1 else f'{"?":^9}|' - table_row += f'{noc_list[2]:^9.2f}|' if len(noc_list) > 2 else f'{"?":^9}|' - table_row += f'{over_max_list[1]:^9}|' if len(noc_list) > 1 else f'{"?":^9}|' - table_row += f'{over_max_list[2]:^9}|' if len(noc_list) > 2 else f'{"?":^9}|' - table_row += f'{mean_spc:^7.3f}|{eval_time:^9}|' - - return header, table_row \ No newline at end of file diff --git a/contrib/EISeg/eiseg/model/initializer.py b/contrib/EISeg/eiseg/model/initializer.py deleted file mode 100644 index 9f3dea865b..0000000000 --- a/contrib/EISeg/eiseg/model/initializer.py +++ /dev/null @@ -1,76 +0,0 @@ -import paddle -import paddle.nn as nn -import numpy as np -from paddle.fluid.initializer import Initializer - -class SInitializer(Initializer): - def __init__(self, local_init=True, gamma=None): - self.local_init = local_init - self.gamma = gamma - - def __call__(self, m): - - - if isinstance(m, (nn.BatchNorm1D, nn.BatchNorm2D, nn.BatchNorm3D, - nn.InstanceNorm1D, nn.InstanceNorm2D, nn.InstanceNorm3D, - nn.GroupNorm, nn.SyncBatchNorm)) or 'BatchNorm' in m.__class__.__name__: - if m.weight is not None: - self._init_gamma(m.weight) - if m.bias is not None: - self._init_beta(m.bias) - else: - if getattr(m, 'weight', None) is not None: - self._init_weight(m.weight) - if getattr(m, 'bias', None) is not None: - self._init_bias(m.bias) - - def _init_weight(self, param): - initializer = nn.initializer.Uniform(-0.07, 0.07) - initializer(param, param.block) - - def _init_bias(self, param): - initializer = nn.initializer.Constant(0) - initializer(param, param.block) - - def _init_gamma(self, param): - if self.gamma is None: - initializer = nn.initializer.Constant(0) - initializer(param, param.block) - else: - initializer = nn.initializer.Normal(1, self.gamma) - initializer(param, param.block) - - def _init_beta(self, param): - initializer = nn.initializer.Constant(0) - initializer(param, param.block) - - -class XavierGluon(SInitializer): - def __init__(self, rnd_type='uniform', factor_type='avg', magnitude=3, **kwargs): - super().__init__(**kwargs) - - self.rnd_type = rnd_type - self.factor_type = factor_type - self.magnitude = float(magnitude) - - def _init_weight(self, arr): - fan_in, fan_out =self._compute_fans(arr) - - if self.factor_type == 'avg': - factor = (fan_in + fan_out) / 2.0 - elif self.factor_type == 'in': - factor = fan_in - elif self.factor_type == 'out': - factor = fan_out - else: - raise ValueError('Incorrect factor type') - scale = np.sqrt(self.magnitude / factor) - - if self.rnd_type == 'uniform': - initializer = nn.initializer.Uniform(-scale, scale) - initializer(arr, arr.block) - elif self.rnd_type == 'gaussian': - initializer = nn.initializer.Normal(0, scale) - initializer(arr, arr.block) - else: - raise ValueError('Unknown random type') diff --git a/contrib/EISeg/eiseg/model/is_deeplab_model.py b/contrib/EISeg/eiseg/model/is_deeplab_model.py deleted file mode 100644 index 0fa0cf940f..0000000000 --- a/contrib/EISeg/eiseg/model/is_deeplab_model.py +++ /dev/null @@ -1,24 +0,0 @@ -import paddle.nn as nn - -from util.serialization import serialize -from .is_model import ISModel -from .modeling.deeplab_v3 import DeepLabV3Plus -from .modeling.basic_blocks import SepConvHead -from model.modifiers import LRMult - -class DeeplabModel(ISModel): - @serialize - def __init__(self, backbone='resnet50', deeplab_ch=256, aspp_dropout=0.5, - backbone_norm_layer=None, backbone_lr_mult=0.1, norm_layer=nn.BatchNorm2D, **kwargs): - super().__init__(norm_layer=norm_layer, **kwargs) - - self.feature_extractor = DeepLabV3Plus(backbone=backbone, ch=deeplab_ch, project_dropout=aspp_dropout, - norm_layer=norm_layer, backbone_norm_layer=backbone_norm_layer) - self.feature_extractor.backbone.apply(LRMult(backbone_lr_mult)) - self.head = SepConvHead(1, in_channels=deeplab_ch, mid_channels=deeplab_ch // 2, - num_layers=2, norm_layer=norm_layer) - - def backbone_forward(self, image, coord_features=None): - backbone_features = self.feature_extractor(image, coord_features) - - return {'instances': self.head(backbone_features[0])} \ No newline at end of file diff --git a/contrib/EISeg/eiseg/model/losses.py b/contrib/EISeg/eiseg/model/losses.py deleted file mode 100644 index 0b73b1edfd..0000000000 --- a/contrib/EISeg/eiseg/model/losses.py +++ /dev/null @@ -1,162 +0,0 @@ -import numpy as np -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -from util import misc - - -class NormalizedFocalLossSigmoid(nn.Layer):#这个有问题 - def __init__(self, axis=-1, alpha=0.25, gamma=2, max_mult=-1, eps=1e-12, - from_sigmoid=False, detach_delimeter=True, - batch_axis=0, weight=None, size_average=True, - ignore_label=-1): - super(NormalizedFocalLossSigmoid, self).__init__() - self._axis = axis - self._alpha = alpha - self._gamma = gamma - self._ignore_label = ignore_label - self._weight = weight if weight is not None else 1.0 - self._batch_axis = batch_axis - - self._from_logits = from_sigmoid - self._eps = eps - self._size_average = size_average - self._detach_delimeter = detach_delimeter - self._max_mult = max_mult - self._k_sum = 0 - self._m_max = 0 - - def forward(self, pred, label): - one_hot = label > 0.5 - sample_weight = label != self._ignore_label - - - sample_weight = sample_weight.astype('float32') - - if not self._from_logits: - pred = F.sigmoid(pred) - alpha = paddle.where(one_hot, self._alpha * sample_weight, (1 - self._alpha) * sample_weight) - pt = paddle.where(sample_weight.astype('bool'), 1.0 - paddle.abs(label - pred), paddle.ones_like(pred)) - beta = (1 - pt) ** self._gamma - sw_sum = paddle.sum(sample_weight, axis=(-2, -1), keepdim=True) - beta_sum = paddle.sum(beta, axis=(-2, -1), keepdim=True) - mult = sw_sum / (beta_sum + self._eps) - - if self._detach_delimeter: - mult = mult.detach() - beta = beta * mult - with paddle.no_grad(): - ignore_area = paddle.sum((label == self._ignore_label).astype('float32'), - axis=tuple(range(1, len(label.shape)))).numpy() - sample_mult = paddle.mean(mult, axis=tuple(range(1, len(mult.shape)))).numpy() - if np.any(ignore_area == 0): - self._k_sum = 0.9 * self._k_sum + 0.1 * sample_mult[ignore_area == 0].mean() - beta_pmax = paddle.max(paddle.flatten(beta, 1), axis=1) - beta_pmax = float(paddle.mean(beta_pmax)) - self._m_max = 0.8 * self._m_max + 0.2 * beta_pmax - - loss_mask = pt + self._eps < 1 - loss_mask = loss_mask.astype('float32') - pt_mask = (pt + self._eps) * loss_mask + (1 - loss_mask)* paddle.ones(pt.shape) - loss = -alpha * beta * paddle.log(pt_mask) - loss = self._weight * (loss * sample_weight) - - if self._size_average: - bsum = paddle.sum(sample_weight, - axis=misc.get_dims_with_exclusion(len(sample_weight.shape), self._batch_axis)) - loss = paddle.sum(loss, axis=misc.get_dims_with_exclusion(len(loss.shape), self._batch_axis)) / ( - bsum + self._eps) - else: - loss = paddle.sum(loss, axis=paddle.get_dims_with_exclusion(len(loss.shape), self._batch_axis)) - - return loss - - def log_states(self, sw, name, global_step): - sw.add_scalar(tag=name + '_k', value=self._k_sum, global_step=global_step) - sw.add_scalar(tag=name + '_m', value=self._m_max, global_step=global_step) - - -class FocalLoss(nn.Layer): - def __init__(self, axis=-1, alpha=0.25, gamma=2, - from_logits=False, batch_axis=0, - weight=None, num_class=None, - eps=1e-9, size_average=True, scale=1.0, - ignore_label=-1): - super(FocalLoss, self).__init__() - self._axis = axis - self._alpha = alpha - self._gamma = gamma - self._ignore_label = ignore_label - self._weight = weight if weight is not None else 1.0 - self._batch_axis = batch_axis - - self._scale = scale - self._num_class = num_class - self._from_logits = from_logits - self._eps = eps - self._size_average = size_average - - def forward(self, pred, label, sample_weight=None): - one_hot = label > 0.5 - sample_weight = label != self._ignore_label - - if not self._from_logits: - pred = F.sigmoid(pred) - alpha = paddle.where(one_hot, self._alpha * sample_weight, (1 - self._alpha) * sample_weight) - pt = paddle.where(one_hot, 1.0 - paddle.abs(label - pred), paddle.ones_like(pred)) - - beta = (1 - pt) ** self._gamma - - loss = -alpha * beta * paddle.log(paddle.min(pt + self._eps, paddle.ones(1, dtype='float32'))) - loss = self._weight * (loss * sample_weight) - - if self._size_average: - tsum = paddle.sum(label == 1, axis=misc.get_dims_with_exclusion(len(label.shape), self._batch_axis)) - loss = paddle.sum(loss, axis=misc.get_dims_with_exclusion(len(loss.shape), self._batch_axis)) / ( - tsum + self._eps) - else: - loss = paddle.sum(loss, axis=misc.get_dims_with_exclusion(len(loss.shape), self._batch_axis)) - return self._scale * loss - - -class SoftIoU(nn.Layer): - def __init__(self, from_sigmoid=False, ignore_label=-1): - super().__init__() - self._from_sigmoid = from_sigmoid - self._ignore_label = ignore_label - - def forward(self, pred, label): - label = label.reshape(pred.shape) - sample_weight = label != self._ignore_label - - if not self._from_sigmoid: - pred = F.sigmoid(pred) - - loss = 1.0 - paddle.sum(pred * label * sample_weight, axis=(1, 2, 3)) \ - / (paddle.sum(paddle.max(pred, label) * sample_weight, axis=(1, 2, 3)) + 1e-8) - - return loss - - -class SigmoidBinaryCrossEntropyLoss(nn.Layer): - def __init__(self, from_sigmoid=False, weight=None, batch_axis=0, ignore_label=-1): - super(SigmoidBinaryCrossEntropyLoss, self).__init__() - self._from_sigmoid = from_sigmoid - self._ignore_label = ignore_label - self._weight = weight if weight is not None else 1.0 - self._batch_axis = batch_axis - - def forward(self, pred, label): - - label = label.reshape(pred.shape) - sample_weight = label != self._ignore_label - label = paddle.where(sample_weight, label, paddle.zeros_like(label)) - - if not self._from_sigmoid: - loss = F.relu(pred) - pred * label + F.softplus(-paddle.abs(pred)) - else: - eps = 1e-12 - loss = -(paddle.log(pred + eps) * label + paddle.log(1. - pred + eps) * (1. - label)) - loss = self._weight * (loss * sample_weight) - return paddle.mean(loss, axis=misc.get_dims_with_exclusion(len(loss.shape), self._batch_axis)) diff --git a/contrib/EISeg/eiseg/model/metrics.py b/contrib/EISeg/eiseg/model/metrics.py deleted file mode 100644 index fb1bf8d862..0000000000 --- a/contrib/EISeg/eiseg/model/metrics.py +++ /dev/null @@ -1,108 +0,0 @@ -import paddle -import paddle.nn.functional as F -import numpy as np - -from util import misc - - -class TrainMetric(object): - def __init__(self, pred_outputs, gt_outputs): - self.pred_outputs = pred_outputs - self.gt_outputs = gt_outputs - - def update(self, *args, **kwargs): - raise NotImplementedError - - def get_epoch_value(self): - raise NotImplementedError - - def reset_epoch_stats(self): - raise NotImplementedError - - def log_states(self, sw, tag_prefix, global_step): - pass - - @property - def name(self): - return type(self).__name__ - - -class AdaptiveIoU(TrainMetric): - def __init__(self, init_thresh=0.4, thresh_step=0.025, thresh_beta=0.99, iou_beta=0.9, - ignore_label=-1, from_logits=True, - pred_output='instances', gt_output='instances'): - super().__init__(pred_outputs=(pred_output,), gt_outputs=(gt_output,)) - self._ignore_label = ignore_label - self._from_logits = from_logits - self._iou_thresh = init_thresh - self._thresh_step = thresh_step - self._thresh_beta = thresh_beta - self._iou_beta = iou_beta - self._ema_iou = 0.0 - self._epoch_iou_sum = 0.0 - self._epoch_batch_count = 0 - - def update(self, pred, gt): - gt_mask = gt > 0.5 - if self._from_logits: - pred = F.sigmoid(pred) - - gt_mask_area = paddle.sum(gt_mask.astype('float32'), axis=(1, 2)).detach().numpy() - if np.all(gt_mask_area == 0): - return - - ignore_mask = gt == self._ignore_label - max_iou = _compute_iou(pred > self._iou_thresh, gt_mask, ignore_mask).mean() - best_thresh = self._iou_thresh - for t in [best_thresh - self._thresh_step, best_thresh + self._thresh_step]: - temp_iou = _compute_iou(pred > t, gt_mask, ignore_mask).mean() - if temp_iou > max_iou: - max_iou = temp_iou - best_thresh = t - - self._iou_thresh = self._thresh_beta * self._iou_thresh + (1 - self._thresh_beta) * best_thresh - self._ema_iou = self._iou_beta * self._ema_iou + (1 - self._iou_beta) * max_iou - self._epoch_iou_sum += max_iou - self._epoch_batch_count += 1 - - def get_epoch_value(self): - if self._epoch_batch_count > 0: - return self._epoch_iou_sum / self._epoch_batch_count - else: - return 0.0 - - def reset_epoch_stats(self): - self._epoch_iou_sum = 0.0 - self._epoch_batch_count = 0 - - def log_states(self, sw, tag_prefix, global_step): - sw.add_scalar(tag_prefix + '_ema_iou', self._ema_iou, global_step) - sw.add_scalar(tag_prefix + '_iou_thresh', self._iou_thresh, global_step) - - @property - def iou_thresh(self): - return self._iou_thresh - - -def _compute_iou(pred_mask, gt_mask, ignore_mask=None, keep_ignore=False): - if ignore_mask is not None: - - pred_mask = paddle.where(ignore_mask, paddle.zeros_like(pred_mask.astype('float32')), pred_mask.astype('float32')) - - reduction_dims = misc.get_dims_with_exclusion(len(gt_mask.shape), 0) - pred_mask = pred_mask.astype('bool') - m = pred_mask.numpy() | gt_mask.numpy() - n = pred_mask.numpy() & gt_mask.numpy() - union = np.mean(m.astype(np.float), axis=tuple(reduction_dims)) - intersection = np.mean(n.astype(np.float), axis=tuple(reduction_dims)) - - nonzero = union > 0 - - iou = intersection[nonzero] / union[nonzero] - if not keep_ignore: - return iou - else: - result = np.full_like(intersection, -1) - result[nonzero] = iou - return result - diff --git a/contrib/EISeg/eiseg/model/modeling/basic_blocks.py b/contrib/EISeg/eiseg/model/modeling/basic_blocks.py deleted file mode 100644 index bb6cf67e51..0000000000 --- a/contrib/EISeg/eiseg/model/modeling/basic_blocks.py +++ /dev/null @@ -1,67 +0,0 @@ -import paddle.nn as nn - - -class ConvHead(nn.Layer): - def __init__(self, out_channels, in_channels=32, num_layers=1, - kernel_size=3, padding=1, - norm_layer=nn.BatchNorm2D): - super(ConvHead, self).__init__() - convhead = [] - - for i in range(num_layers): - convhead.extend([ - nn.Conv2D(in_channels, in_channels, kernel_size, padding=padding), - nn.ReLU(), - norm_layer(in_channels) - ]) - convhead.append(nn.Conv2D(in_channels, out_channels, 1, padding=0)) - - self.convhead = nn.Sequential(*convhead) - - def forward(self, *inputs): - return self.convhead(inputs[0]) - - -class SepConvHead(nn.Layer): - def __init__(self, num_outputs, in_channels, mid_channels, num_layers=1, - kernel_size=3, padding=1, dropout_ratio=0.0, dropout_indx=0, - norm_layer=nn.BatchNorm2D): - super(SepConvHead, self).__init__() - - sepconvhead = [] - - for i in range(num_layers): - sepconvhead.append( - SeparableConv2d(in_channels=in_channels if i == 0 else mid_channels, - out_channels=mid_channels, - dw_kernel=kernel_size, dw_padding=padding, - norm_layer=norm_layer) - ) - if dropout_ratio > 0 and dropout_indx == i: - sepconvhead.append(nn.Dropout(dropout_ratio)) - - sepconvhead.append( - nn.Conv2D(in_channels=mid_channels, out_channels=num_outputs, kernel_size=1, padding=0) - ) - - self.layers = nn.Sequential(*sepconvhead) - - def forward(self, *inputs): - x = inputs[0] - - return self.layers(x) - - -class SeparableConv2d(nn.Layer): - def __init__(self, in_channels, out_channels, dw_kernel, dw_padding, dw_stride=1, use_bias=False, norm_layer=None): - super(SeparableConv2d, self).__init__() - self.body = nn.Sequential( - nn.Conv2D(in_channels, in_channels, kernel_size=dw_kernel, stride=dw_stride, - padding=dw_padding, bias_attr=use_bias, groups=in_channels), - nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=use_bias), - norm_layer(out_channels), - nn.ReLU() - ) - - def forward(self, x): - return self.body(x) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/model/modeling/deeplab_v3.py b/contrib/EISeg/eiseg/model/modeling/deeplab_v3.py deleted file mode 100644 index 4eb996378e..0000000000 --- a/contrib/EISeg/eiseg/model/modeling/deeplab_v3.py +++ /dev/null @@ -1,176 +0,0 @@ -from contextlib import ExitStack - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F - -from .basic_blocks import SeparableConv2d -from paddleseg.models.backbones import * -from .resnet import ResNetBackbone - -class DeepLabV3Plus(nn.Layer): - def __init__(self, backbone='resnet50', norm_layer=nn.BatchNorm2D, - backbone_norm_layer=None, - ch=256, - project_dropout=0.5, - inference_mode=False, - **kwargs): - super(DeepLabV3Plus, self).__init__() - if backbone_norm_layer is None: - backbone_norm_layer = norm_layer - - self.backbone_name = backbone - self.norm_layer = norm_layer - self.backbone_norm_layer = backbone_norm_layer - self.inference_mode = False - self.ch = ch - self.aspp_in_channels = 2048 - self.skip_project_in_channels = 256 # layer 1 out_channels - - self._kwargs = kwargs - if backbone == 'resnet34': - self.aspp_in_channels = 512 - self.skip_project_in_channels = 64 - - self.backbone = ResNetBackbone(backbone=self.backbone_name, pretrained_base=False, - norm_layer=self.backbone_norm_layer, **kwargs) - - - self.head = _DeepLabHead(in_channels=ch + 32, mid_channels=ch, out_channels=ch, - norm_layer=self.norm_layer) - self.skip_project = _SkipProject(self.skip_project_in_channels, 32, norm_layer=self.norm_layer) - self.aspp = _ASPP(in_channels=self.aspp_in_channels, - atrous_rates=[12, 24, 36], - out_channels=ch, - project_dropout=project_dropout, - norm_layer=self.norm_layer) - - if inference_mode: - self.set_prediction_mode() - - def load_pretrained_weights(self): - pretrained = ResNetBackbone(backbone=self.backbone_name, pretrained_base=True, - norm_layer=self.backbone_norm_layer, **self._kwargs) - - backbone_state_dict = self.backbone.state_dict() - pretrained_state_dict = pretrained.state_dict() - - backbone_state_dict.update(pretrained_state_dict) - self.backbone.load_state_dict(backbone_state_dict) - - if self.inference_mode: - for param in self.backbone.parameters(): - param.requires_grad = False - - def set_prediction_mode(self): - self.inference_mode = True - self.eval() - - def forward(self, x, additional_features=None): - with ExitStack() as stack: - if self.inference_mode: - stack.enter_context(paddle.no_grad()) - - c1, _, c3, c4 = self.backbone(x, additional_features) - c1 = self.skip_project(c1) - - x = self.aspp(c4) - x = F.interpolate(x, c1.shape[2:], mode='bilinear', align_corners=True) - x = paddle.concat((x, c1), axis=1) - x = self.head(x) - - return x, - - -class _SkipProject(nn.Layer): - def __init__(self, in_channels, out_channels, norm_layer=nn.BatchNorm2D): - super(_SkipProject, self).__init__() - - self.skip_project = nn.Sequential( - nn.Conv2D(in_channels, out_channels, kernel_size=1, bias_attr=False), - norm_layer(out_channels), - nn.ReLU() - ) - - def forward(self, x): - return self.skip_project(x) - - -class _DeepLabHead(nn.Layer): - def __init__(self, out_channels, in_channels, mid_channels=256, norm_layer=nn.BatchNorm2D): - super(_DeepLabHead, self).__init__() - - self.block = nn.Sequential( - SeparableConv2d(in_channels=in_channels, out_channels=mid_channels, dw_kernel=3, - dw_padding=1, norm_layer=norm_layer), - SeparableConv2d(in_channels=mid_channels, out_channels=mid_channels, dw_kernel=3, - dw_padding=1, norm_layer=norm_layer), - nn.Conv2D(in_channels=mid_channels, out_channels=out_channels, kernel_size=1) - ) - - def forward(self, x): - return self.block(x) - - -class _ASPP(nn.Layer): - def __init__(self, in_channels, atrous_rates, out_channels=256, - project_dropout=0.5, norm_layer=nn.BatchNorm2D): - super(_ASPP, self).__init__() - - b0 = nn.Sequential( - nn.Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias_attr=False), - norm_layer(out_channels), - nn.ReLU() - ) - - rate1, rate2, rate3 = tuple(atrous_rates) - b1 = _ASPPConv(in_channels, out_channels, rate1, norm_layer) - b2 = _ASPPConv(in_channels, out_channels, rate2, norm_layer) - b3 = _ASPPConv(in_channels, out_channels, rate3, norm_layer) - b4 = _AsppPooling(in_channels, out_channels, norm_layer=norm_layer) - - self.concurent = nn.LayerList([b0, b1, b2, b3, b4]) - - project = [ - nn.Conv2D(in_channels=5 * out_channels, out_channels=out_channels, - kernel_size=1, bias_attr=False), - norm_layer(out_channels), - nn.ReLU() - ] - if project_dropout > 0: - project.append(nn.Dropout(project_dropout)) - self.project = nn.Sequential(*project) - - def forward(self, x): - x = paddle.concat([block(x) for block in self.concurent], axis=1) - - return self.project(x) - - -class _AsppPooling(nn.Layer): - def __init__(self, in_channels, out_channels, norm_layer): - super(_AsppPooling, self).__init__() - - self.gap = nn.Sequential( - nn.AdaptiveAvgPool2D((1, 1)), - nn.Conv2D(in_channels=in_channels, out_channels=out_channels, - kernel_size=1, bias_attr=False), - norm_layer(out_channels), - nn.ReLU() - ) - - def forward(self, x): - pool = self.gap(x) - return F.interpolate(pool, x.shape[2:], mode='bilinear', align_corners=True) - - -def _ASPPConv(in_channels, out_channels, atrous_rate, norm_layer): - block = nn.Sequential( - nn.Conv2D(in_channels=in_channels, out_channels=out_channels, - kernel_size=3, padding=atrous_rate, - dilation=atrous_rate, bias_attr=False), - norm_layer(out_channels), - nn.ReLU() - ) - - return block diff --git a/contrib/EISeg/eiseg/model/modeling/resnet.py b/contrib/EISeg/eiseg/model/modeling/resnet.py deleted file mode 100644 index 9dc791ac75..0000000000 --- a/contrib/EISeg/eiseg/model/modeling/resnet.py +++ /dev/null @@ -1,45 +0,0 @@ -import paddle.nn as nn -from .resnetv1b import resnet18_v1b, resnet34_v1b, resnet50_v1s, resnet101_v1s, resnet152_v1s - - -class ResNetBackbone(nn.Layer): - def __init__(self, backbone='resnet50', pretrained_base=True, dilated=True, **kwargs): - super(ResNetBackbone, self).__init__() - - if backbone == 'resnet34': - pretrained = resnet34_v1b(pretrained=pretrained_base, dilated=dilated, **kwargs) - elif backbone == 'resnet50': - pretrained = resnet50_v1s(pretrained=pretrained_base, dilated=dilated, **kwargs) - elif backbone == 'resnet101': - pretrained = resnet101_v1s(pretrained=pretrained_base, dilated=dilated, **kwargs) - elif backbone == 'resnet152': - pretrained = resnet152_v1s(pretrained=pretrained_base, dilated=dilated, **kwargs) - else: - raise RuntimeError(f'unknown backbone: {backbone}') - - self.conv1 = pretrained.conv1 - self.bn1 = pretrained.bn1 - self.relu = pretrained.relu - self.maxpool = pretrained.maxpool - self.layer1 = pretrained.layer1 - self.layer2 = pretrained.layer2 - self.layer3 = pretrained.layer3 - self.layer4 = pretrained.layer4 - - def forward(self, x, additional_features=None): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - - if additional_features is not None: - x = x + nn.functional.pad(additional_features, - [0, 0, 0, 0, 0, x.shape(1) - additional_features.shape(1)], - mode='constant', value=0) - x = self.maxpool(x) - c1 = self.layer1(x) - c2 = self.layer2(c1) - c3 = self.layer3(c2) - c4 = self.layer4(c3) - - return c1, c2, c3, c4 - diff --git a/contrib/EISeg/eiseg/model/modifiers.py b/contrib/EISeg/eiseg/model/modifiers.py deleted file mode 100644 index 18083f7f94..0000000000 --- a/contrib/EISeg/eiseg/model/modifiers.py +++ /dev/null @@ -1,10 +0,0 @@ - -class LRMult(object): - def __init__(self, lr_mult=1.): - self.lr_mult = lr_mult - - def __call__(self, m): - if getattr(m, 'weight', None) is not None: - m.weight.lr_mult = self.lr_mult - if getattr(m, 'bias', None) is not None: - m.bias.lr_mult = self.lr_mult \ No newline at end of file diff --git a/contrib/EISeg/eiseg/models.py b/contrib/EISeg/eiseg/models.py deleted file mode 100644 index 038b7a05d7..0000000000 --- a/contrib/EISeg/eiseg/models.py +++ /dev/null @@ -1,62 +0,0 @@ -import os.path as osp - -import paddle - -from model.is_hrnet_model import HRNetModel -# from model.is_deeplab_model import DeeplabModel -# from util import model_path - -here = osp.dirname(osp.abspath(__file__)) - - -class HRNet18_OCR48: - name = "HRNet18_OCR48" - - def load_params(self, params_path): - model = HRNetModel( - width=18, - ocr_width=48, - small=True, - with_aux_output=True, - use_rgb_conv=False, - use_leaky_relu=True, - use_disks=True, - with_prev_mask=True, - norm_radius=5, - cpu_dist_maps=False - ) - para_state_dict = paddle.load(params_path) - model.set_dict(para_state_dict) - model.eval() - return model - - -class HRNet18_OCR64: - name = "HRNet18_OCR64" - - def load_params(self, params_path): - model = HRNetModel( - width=18, - ocr_width=64, - small=False, - with_aux_output=True, - use_leaky_relu=True, - use_rgb_conv=False, - use_disks=True, - norm_radius=5, - with_prev_mask=True, - cpu_dist_maps=False # 目前打包cython有些问题,先默认用False - ) - para_state_dict = paddle.load(params_path) - model.set_dict(para_state_dict) - model.eval() - return model - - -models = [HRNet18_OCR48(), HRNet18_OCR64()] - - -def findModelbyName(model_name): - for idx, mt in enumerate(models): - if model_name == mt.name: - return models[idx], idx \ No newline at end of file diff --git a/contrib/EISeg/eiseg/resource/About.png b/contrib/EISeg/eiseg/resource/About.png deleted file mode 100644 index be833f1251..0000000000 Binary files a/contrib/EISeg/eiseg/resource/About.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/AutoSave.png b/contrib/EISeg/eiseg/resource/AutoSave.png deleted file mode 100644 index 552e970862..0000000000 Binary files a/contrib/EISeg/eiseg/resource/AutoSave.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/ChangeLabelPath.png b/contrib/EISeg/eiseg/resource/ChangeLabelPath.png deleted file mode 100644 index 2c05aee768..0000000000 Binary files a/contrib/EISeg/eiseg/resource/ChangeLabelPath.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Clear.png b/contrib/EISeg/eiseg/resource/Clear.png deleted file mode 100644 index 2c7a0ebebd..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Clear.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/ClearLabel.png b/contrib/EISeg/eiseg/resource/ClearLabel.png deleted file mode 100644 index 27c7b0ae56..0000000000 Binary files a/contrib/EISeg/eiseg/resource/ClearLabel.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Close.png b/contrib/EISeg/eiseg/resource/Close.png deleted file mode 100644 index 27c32940d4..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Close.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/End.png b/contrib/EISeg/eiseg/resource/End.png deleted file mode 100644 index 9c6adb4abb..0000000000 Binary files a/contrib/EISeg/eiseg/resource/End.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/ExportLabel.png b/contrib/EISeg/eiseg/resource/ExportLabel.png deleted file mode 100644 index ec0374d84b..0000000000 Binary files a/contrib/EISeg/eiseg/resource/ExportLabel.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/File.png b/contrib/EISeg/eiseg/resource/File.png deleted file mode 100644 index e2dc890aa1..0000000000 Binary files a/contrib/EISeg/eiseg/resource/File.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/ImportLabel.png b/contrib/EISeg/eiseg/resource/ImportLabel.png deleted file mode 100644 index c868978856..0000000000 Binary files a/contrib/EISeg/eiseg/resource/ImportLabel.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Label.png b/contrib/EISeg/eiseg/resource/Label.png deleted file mode 100644 index 5523086a44..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Label.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Model.png b/contrib/EISeg/eiseg/resource/Model.png deleted file mode 100644 index 9188aa0aa7..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Model.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/N2.png b/contrib/EISeg/eiseg/resource/N2.png deleted file mode 100644 index 47a53c164d..0000000000 Binary files a/contrib/EISeg/eiseg/resource/N2.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Next.png b/contrib/EISeg/eiseg/resource/Next.png deleted file mode 100644 index a96ebaaaef..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Next.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Ok.png b/contrib/EISeg/eiseg/resource/Ok.png deleted file mode 100644 index bcd3fac946..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Ok.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/OpenFolder.png b/contrib/EISeg/eiseg/resource/OpenFolder.png deleted file mode 100644 index 4927008af8..0000000000 Binary files a/contrib/EISeg/eiseg/resource/OpenFolder.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/OpenImage.png b/contrib/EISeg/eiseg/resource/OpenImage.png deleted file mode 100644 index 1c221ad54d..0000000000 Binary files a/contrib/EISeg/eiseg/resource/OpenImage.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/OtherSave.png b/contrib/EISeg/eiseg/resource/OtherSave.png deleted file mode 100644 index 7ee7999a6d..0000000000 Binary files a/contrib/EISeg/eiseg/resource/OtherSave.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Prev.png b/contrib/EISeg/eiseg/resource/Prev.png deleted file mode 100644 index 0c90e1742e..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Prev.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/RecentDocuments.png b/contrib/EISeg/eiseg/resource/RecentDocuments.png deleted file mode 100644 index 684eb3c515..0000000000 Binary files a/contrib/EISeg/eiseg/resource/RecentDocuments.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Redo.png b/contrib/EISeg/eiseg/resource/Redo.png deleted file mode 100644 index 5bf985547c..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Redo.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Save.png b/contrib/EISeg/eiseg/resource/Save.png deleted file mode 100644 index 183bf12183..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Save.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Shortcut.png b/contrib/EISeg/eiseg/resource/Shortcut.png deleted file mode 100644 index 3888ae0a5f..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Shortcut.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Undo.png b/contrib/EISeg/eiseg/resource/Undo.png deleted file mode 100644 index f9de006217..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Undo.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/resource/Use.png b/contrib/EISeg/eiseg/resource/Use.png deleted file mode 100644 index e727921323..0000000000 Binary files a/contrib/EISeg/eiseg/resource/Use.png and /dev/null differ diff --git a/contrib/EISeg/eiseg/run.py b/contrib/EISeg/eiseg/run.py deleted file mode 100644 index d91b76b99d..0000000000 --- a/contrib/EISeg/eiseg/run.py +++ /dev/null @@ -1,14 +0,0 @@ -import sys - -from qtpy.QtWidgets import QApplication # 导入PyQt相关模块 -from app import APP_EISeg # 导入带槽的界面 - - -def main(): - app = QApplication(sys.argv) - myWin = APP_EISeg() # 创建对象 - myWin.showMaximized() # 全屏显示窗口 - # 加载近期模型 - QApplication.processEvents() - myWin.load_recent_params() - sys.exit(app.exec_()) \ No newline at end of file diff --git a/contrib/EISeg/eiseg/train/README.md b/contrib/EISeg/eiseg/train/README.md deleted file mode 100644 index e2154bdede..0000000000 --- a/contrib/EISeg/eiseg/train/README.md +++ /dev/null @@ -1,81 +0,0 @@ -# 训练EISeg可用的自定义模型 - -目前已经可以通过简单的配置完成模型训练了,但其中有些设置还不能通过配置文件进行修改。 - -## 一、数据组织 - -在需要训练自己的数据集时,目前需要将数据集构造为如下格式,直接放在datasets文件夹中。文件名可以根据要求来进行设置,只需要在配置文件中设定好即可,图像和标签与平时使用的分割图像的用法相同。 - -``` -datasets - | - ├── train_data - | ├── img - | | └── filename_1.jpg - | └── gt - | └── filename_1.png - | - └── eval_data - ├── img - | └── filename_1.jpg - └── gt - └── filename_1.png -``` - -## 二、训练 - -直接运行ritm_train.py即可开始训练。 - -```python -%cd train -! python ritm_train.py --config train_config.yaml -``` - -目前一些简单的参数已经可以在yaml配置文件中进行自定义设置,不过现阶段仍然不够灵活,可能出现各种问题。 - -``` -iters: 100000 # 训练轮数 -batch_size: 16 # bs大小 -save_interval: 1000 # 保存间隔 -log_iters: 10 # 打印log的间隔 -worker: 4 # 子进程数 -save_dir: model_output # 保存路径 -use_vdl: False # 是否使用vdl - -dataset: - dataset_path: EISeg/train/datasets # 数据集所在路径 - image_name: img # 图像文件夹的名称 - label_name: gt # 标签文件夹的名称 - -train_dataset: # 训练数据 - crop_size: [320, 480] # 裁剪大小 - folder_name: train_data # 训练数据文件夹的名称 - -val_dataset: # 验证数据 - folder_name: val_data # 验证数据文件夹的名称 - -optimizer: - type: adam # 优化器,目前仅可以选择‘adam’和‘sgd’ - -learning_rate: - value_1: 5e-5 # 需要设置两个学习率 - value_2: 5e-6 - decay: - type: poly # 学习率衰减,目前仅支持‘poly’,可以修改下面的参数 - steps: 1000 - power: 0.9 - end_lr: 0.0 - -model: - type: deeplab # 模型名称,目前支持‘hrnet’、‘deeplab’以及‘shufflenet’ - backbone: resnet18 # 下面的参数是模型对应的参数,可在源码中查看 - is_ritm: True - weights: None # 加载权重的路径 -``` - - - -### * 说明 - -1. 这里有个坑,数据不能有没有标签的纯背景,这样找不到正样点训练就会卡住,并且还不报错。 - diff --git a/contrib/EISeg/eiseg/train/ritm_train.py b/contrib/EISeg/eiseg/train/ritm_train.py deleted file mode 100644 index 325a811cec..0000000000 --- a/contrib/EISeg/eiseg/train/ritm_train.py +++ /dev/null @@ -1,408 +0,0 @@ -import os -import sys -import random -import argparse -from easydict import EasyDict as edict -import time - -import matplotlib.pyplot as plt -import paddle -import paddle.nn as nn -import paddleseg.transforms as T -from paddleseg.utils import logger, get_sys_env, logger -from albumentations import ( - Compose, - ShiftScaleRotate, - PadIfNeeded, - RandomCrop, - RGBShift, - RandomBrightnessContrast, - RandomRotate90, - HorizontalFlip, -) - -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) - -from model.model import ( - get_hrnet_model, - DistMapsHRNetModel, - get_deeplab_model, - get_shufflenet_model, -) -from model.modeling.hrnet_ocr import HighResolutionNet -from model.loss import * -from data.points_sampler import MultiPointSampler -from data.mdiy import MyDataset -from util.config import cfgData -from util.util import * - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--config", - dest="cfg", - type=str, - default="./train_config.yaml", - help="The config file.", - ) - return parser.parse_args() - - -def main(): - env_info = get_sys_env() - info = ["{}: {}".format(k, v) for k, v in env_info.items()] - info = "\n".join( - ["", format("Environment Information", "-^48s")] + info + ["-" * 48] - ) - logger.info(info) - place = ( - "gpu" - if env_info["Paddle compiled with cuda"] and env_info["GPUs used"] - else "cpu" - ) - paddle.set_device(place) - nranks = paddle.distributed.ParallelEnv().nranks - local_rank = paddle.distributed.ParallelEnv().local_rank - - cfg = cfgData(parse_args().cfg) - model_cfg = edict() - model_cfg.input_normalization = { - "mean": [0.5, 0.5, 0.5], - "std": [1, 1, 1], - } - model_cfg.num_max_points = 10 - model_cfg.input_transform = T.Compose( - [ - T.Normalize( - mean=model_cfg.input_normalization["mean"], - std=model_cfg.input_normalization["std"], - ) - ], - to_rgb=False, - ) - nn.initializer.set_global_initializer( - nn.initializer.Normal(), nn.initializer.Constant() - ) - models = cfg.get("model") - if models.get("type") == "deeplab": - model = get_deeplab_model( - backbone=models.get("backbone"), is_ritm=models.get("is_ritm") - ) - elif models.get("type") == "hrnet": - model = get_hrnet_model( - width=models.get("width"), - ocr_width=models.get("ocr_width"), - with_aux_output=models.get("with_aux_output"), - is_ritm=models.get("is_ritm"), - ) - elif models.get("type") == "shufflenet": - model = get_shufflenet_model() - if models.get("weights") != "None": - model.load_weights(models.get("weights")) - backbone_params, other_params = model.get_trainable_params() - - if nranks > 1: - if ( - not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized() - ): - paddle.distributed.init_parallel_env() - ddp_net = paddle.DataParallel(model) - else: - ddp_net = paddle.DataParallel(model) - train( - ddp_net, - cfg, - model_cfg, - backbone_params=backbone_params, - other_params=other_params, - ) - else: - train( - model, - cfg, - model_cfg, - backbone_params=backbone_params, - other_params=other_params, - ) - - -def train(model, cfg, model_cfg, backbone_params=None, other_params=None): - local_rank = paddle.distributed.ParallelEnv().local_rank - - max_iters = cfg.get("iters") - save_dir = cfg.get("save_dir") - batch_size = cfg.get("batch_size") if cfg.get("batch_size") > 1 else 1 - val_batch_size = batch_size - input_normalization = model_cfg.input_normalization - crop_size = cfg.get("train_dataset").get("crop_size") - log_iters = cfg.get("log_iters") - save_interval = cfg.get("save_interval") - num_masks = 1 - - train_augmentator = Compose( - [ - UniformRandomResize(scale_range=(0.75, 1.40)), - HorizontalFlip(), - PadIfNeeded(min_height=crop_size[0], min_width=crop_size[1], border_mode=0), - RandomCrop(*crop_size), - # RandomBrightnessContrast( - # brightness_limit=(-0.25, 0.25), contrast_limit=(-0.15, 0.4), p=0.75 - # ), - # RGBShift(r_shift_limit=10, g_shift_limit=10, b_shift_limit=10, p=0.75), - ], - p=1.0, - ) - val_augmentator = Compose( - [ - PadIfNeeded(min_height=crop_size[0], min_width=crop_size[1], border_mode=0), - RandomCrop(*crop_size), - ], - p=1.0, - ) - - def scale_func(image_shape): - return random.uniform(0.75, 1.25) - - points_sampler = MultiPointSampler( - model_cfg.num_max_points, - prob_gamma=0.7, - merge_objects_prob=0.15, - max_num_merged_objects=2, - ) - trainset = MyDataset( - dataset_path=cfg.get("dataset").get("dataset_path"), - folder_name=cfg.get("train_dataset").get("folder_name"), - images_dir_name=cfg.get("dataset").get("image_name"), - masks_dir_name=cfg.get("dataset").get("label_name"), - num_masks=num_masks, - augmentator=train_augmentator, - points_from_one_object=False, - input_transform=model_cfg.input_transform, - min_object_area=80, - keep_background_prob=0.0, - image_rescale=scale_func, - points_sampler=points_sampler, - samples_scores_path=None, - samples_scores_gamma=1.25, - ) - valset = MyDataset( - dataset_path=cfg.get("dataset").get("dataset_path"), - folder_name=cfg.get("val_dataset").get("folder_name"), - images_dir_name=cfg.get("dataset").get("image_name"), - masks_dir_name=cfg.get("dataset").get("label_name"), - augmentator=val_augmentator, - num_masks=num_masks, - points_from_one_object=False, - input_transform=model_cfg.input_transform, - min_object_area=80, - image_rescale=scale_func, - points_sampler=points_sampler, - ) - batch_sampler = paddle.io.DistributedBatchSampler( - trainset, batch_size=batch_size, shuffle=True, drop_last=True - ) - loader = paddle.io.DataLoader( - trainset, - batch_sampler=batch_sampler, - return_list=True, - ) - val_batch_sampler = paddle.io.DistributedBatchSampler( - valset, batch_size=batch_size, shuffle=True, drop_last=True - ) - val_loader = paddle.io.DataLoader( - valset, - batch_sampler=val_batch_sampler, - return_list=True, - ) - - if cfg.get("use_vdl"): - from visualdl import LogWriter - - log_writer = LogWriter(save_dir) - - iters_per_epoch = len(batch_sampler) - - opt = None - if cfg.get("optimizer").get("type") == "adam": - opt = paddle.optimizer.Adam - elif cfg.get("optimizer").get("type") == "sgd": - opt = paddle.optimizer.SGD - else: - raise ValueError("Opt only have adam or sgd now.") - lr = None - if cfg.get("learning_rate").get("decay").get("type") == "poly": - lr = paddle.optimizer.lr.PolynomialDecay - else: - raise ValueError("Lr only have poly now.") - optimizer1 = opt( - learning_rate=lr( - float(cfg.get("learning_rate").get("value_1")), - decay_steps=cfg.get("learning_rate").get("decay").get("steps"), - end_lr=cfg.get("learning_rate").get("decay").get("end_lr"), - power=cfg.get("learning_rate").get("decay").get("power"), - ), - parameters=other_params, - ) - optimizer2 = opt( - learning_rate=lr( - float(cfg.get("learning_rate").get("value_1")), - decay_steps=cfg.get("learning_rate").get("decay").get("steps"), - end_lr=cfg.get("learning_rate").get("decay").get("end_lr"), - power=cfg.get("learning_rate").get("decay").get("power"), - ), - parameters=backbone_params, - ) - instance_loss = NormalizedFocalLossSigmoid(alpha=0.5, gamma=2) - instance_aux_loss = SigmoidBinaryCrossEntropyLoss() - model.train() - # with open('mobilenet_model.txt', 'w') as f: - # for keys, values in model.state_dict().items(): - # f.write(keys +'\t'+str(values.shape)+"\n") - iters = 0 - avg_loss = 0.0 - while iters < max_iters: - for data in loader: - tic = time.time() - iters += 1 - # print("begin ", iters) - if iters > max_iters: - break - if len(data) == 3: - images, points, masks = data - else: - images, points = data - masks = None - if masks is not None: - batch_size, num_points, c, h, w = masks.shape - masks = masks.reshape([batch_size * num_points, c, h, w]) - - # print(points.numpy()) - # output = batch_forward(model, images, masks, points) - # - # img = images.numpy()[0] - # img = np.moveaxis(img, 0, 2) - # print(img.shape) - # print(img.max(), img.min()) - # plt.imshow(((img + 0.5) * 255).astype("uint8")) - # plt.show() - # # cv2.imwrite("img.png", (img + 0.5) * 255) - # - # print(masks.numpy().shape) - # mask = masks.numpy()[0] - # mask = np.moveaxis(mask, 0, 2) - # cv2.imwrite("mask.png", mask * 255) - - # output = model(images, points) - # print('instance', output['instances']) - # print('mask', masks) - - loss = instance_loss(output["instances"], masks) - if "instances_aux" in output.keys(): - aux_loss = instance_aux_loss(output["instances_aux"], masks) - total_loss = loss + 0.4 * aux_loss - else: - total_loss = loss - avg_loss += total_loss.numpy()[0] - total_loss.backward() - optimizer1.step() - optimizer2.step() - lr = optimizer1.get_lr() - if isinstance(optimizer1._learning_rate, paddle.optimizer.lr.LRScheduler): - optimizer1._learning_rate.step() - if isinstance(optimizer2._learning_rate, paddle.optimizer.lr.LRScheduler): - optimizer2._learning_rate.step() - model.clear_gradients() - if iters % log_iters == 0: - avg_loss /= log_iters - logger.info( - "Epoch={}, Step={}/{}, loss={:.4f}, lr={}".format( - (iters - 1) // iters_per_epoch + 1, - iters, - max_iters, - avg_loss, - lr, - ) - ) - if cfg.get("use_vdl"): - log_writer.add_scalar("Train/loss", avg_loss, iters) - log_writer.add_scalar("Train/lr", lr, iters) - avg_loss = 0.0 - if (iters % save_interval == 0 or iters == max_iters) and local_rank == 0: - model.eval() - total_len = len(val_loader) - val_iou = 0 - for val_num, val_data in enumerate(val_loader): - if len(data) == 3: - val_images, val_points, val_masks = val_data - else: - val_images, val_points = val_data - val_masks = None - if val_masks is not None: - ( - val_batch_size, - val_num_points, - val_c, - val_h, - val_w, - ) = val_masks.shape - val_masks = val_masks.reshape( - [val_batch_size * val_num_points, val_c, val_h, val_w] - ) - val_output = batch_forward( - model, val_images, val_masks, val_points, is_train=False - )["instances"] - # val_output = model(val_images, val_points)['instances'] - # print('max', paddle.max(val_output)) - # print('output shape', val_output.shape) - val_output = nn.functional.interpolate( - val_output, - mode="bilinear", - align_corners=True, - size=val_masks.shape[2:], - ) - val_output = val_output > 0.5 - iter_iou = get_iou(val_masks.numpy(), val_output.numpy()) - val_iou += iter_iou - logger.info( - "mean iou of iter {} is {}".format(iters, val_iou / total_len) - ) - - if cfg.get("use_vdl"): - log_writer.add_scalar("Eval/miou", val_iou / total_len, iters) - - current_save_dir = os.path.join(save_dir, "iter_{}".format(iters)) - - if not os.path.isdir(current_save_dir): - os.makedirs(current_save_dir) - paddle.save( - model.state_dict(), os.path.join(current_save_dir, "model.pdparams") - ) - model.train() - toc = time.time() - print("ETA: ", toc - tic, (toc - tic) / 60 / 60 * (max_iters - iters)) - # print("end", iters) - - -def batch_forward(model, image, gt_mask, points, is_train=True): - orig_image, orig_gt_mask = image.clone(), gt_mask.clone() - prev_output = paddle.zeros_like(image, dtype="float32")[:, :1, ::] - # last_click_indx = None - num_iters = random.randint(1, 3) - if is_train: - model.eval() - with paddle.no_grad(): - for click_indx in range(num_iters): - net_input = paddle.concat([image, prev_output], axis=1) - prev_output = model(net_input, points)["instances"] - prev_output = nn.functional.sigmoid(prev_output) - points = get_next_points(prev_output, orig_gt_mask, points, click_indx + 1) - if is_train: - model.train() - net_input = paddle.concat([image, prev_output], axis=1) - output = model(net_input, points) - return output - - -if __name__ == "__main__": - main() diff --git a/contrib/EISeg/eiseg/train/train_config.yaml b/contrib/EISeg/eiseg/train/train_config.yaml deleted file mode 100644 index 2212119194..0000000000 --- a/contrib/EISeg/eiseg/train/train_config.yaml +++ /dev/null @@ -1,38 +0,0 @@ -iters: 4000 -batch_size: 1 -save_interval: 10 -log_iters: 1 -worker: 0 -save_dir: model_output -use_vdl: False - -dataset: - dataset_path: /home/lin/Desktop/aorta/private/imgs/二院/ - image_name: JPEGImages - label_name: new - -train_dataset: - crop_size: [512, 512] - folder_name: train - -val_dataset: - folder_name: eval - -optimizer: - type: adam - -learning_rate: - value_1: 5e-3 - value_2: 5e-6 - decay: - type: poly - steps: 1000 - power: 0.9 - end_lr: 0 - -model: - # type: shufflenet - type: deeplab - backbone: resnet18 - is_ritm: True - weights: None diff --git a/contrib/EISeg/eiseg/ui.py b/contrib/EISeg/eiseg/ui.py deleted file mode 100644 index 211ea53437..0000000000 --- a/contrib/EISeg/eiseg/ui.py +++ /dev/null @@ -1,377 +0,0 @@ -from qtpy import QtCore, QtGui, QtWidgets -from qtpy.QtCore import Qt -from qtpy.QtWidgets import QGraphicsView - -# from models import models -from functools import partial - -from models import models - -__APPNAME__ = "EISeg 0.1.5" - - -class Canvas(QGraphicsView): - clickRequest = QtCore.Signal(int, int, bool) - - def __init__(self, *args): - super(Canvas, self).__init__(*args) - self.setTransformationAnchor(QGraphicsView.NoAnchor) - self.setResizeAnchor(QGraphicsView.NoAnchor) - self.point = QtCore.QPoint(0, 0) - self.middle_click = False - self.zoom_all = 1 - - def wheelEvent(self, event): - if event.modifiers() & QtCore.Qt.ControlModifier: - print(event.angleDelta().x(), event.angleDelta().y()) - # self.zoom += event.angleDelta().y() / 2880 - zoom = 1 + event.angleDelta().y() / 2880 - self.zoom_all *= zoom - oldPos = self.mapToScene(event.pos()) - if self.zoom_all >= 0.02 and self.zoom_all <= 50: # 限制缩放的倍数 - # print(self.zoom_all) - self.scale(zoom, zoom) - newPos = self.mapToScene(event.pos()) - delta = newPos - oldPos - self.translate(delta.x(), delta.y()) - event.ignore() - else: - super(Canvas, self).wheelEvent(event) - - def mousePressEvent(self, ev): - print("view pos", ev.pos().x(), ev.pos().y()) - print("scene pos", self.mapToScene(ev.pos())) - pos = self.mapToScene(ev.pos()) - if ev.buttons() in [Qt.LeftButton, Qt.RightButton]: - self.clickRequest.emit(pos.x(), pos.y(), ev.buttons() == Qt.LeftButton) - elif ev.buttons() == Qt.MiddleButton: - self.middle_click = True - self._startPos = ev.pos() - - def mouseMoveEvent(self, ev): - if self.middle_click and ( - self.horizontalScrollBar().isVisible() - or self.verticalScrollBar().isVisible() - ): - # 放大到出现滚动条才能拖动,避免出现抖动 - self._endPos = ev.pos() / self.zoom_all - self._startPos / self.zoom_all - # 这儿不写为先减后除,这样会造成速度不一致 - self.point = self.point + self._endPos - self._startPos = ev.pos() - print("move", self._endPos.x(), self._endPos.y()) - self.translate(self._endPos.x(), self._endPos.y()) - - def mouseReleaseEvent(self, ev): - if ev.button() == Qt.MiddleButton: - self.middle_click = False - - -class Ui_Help(object): - def setupUi(self, Dialog): - Dialog.setObjectName("Dialog") - Dialog.setWindowTitle("Help") - Dialog.resize(650, 560) - Dialog.setStyleSheet("background-color: rgb(255, 255, 255);") - horizontalLayout = QtWidgets.QHBoxLayout(Dialog) - horizontalLayout.setObjectName("horizontalLayout") - label = QtWidgets.QLabel(Dialog) - label.setText("") - # label.setPixmap(QtGui.QPixmap("EISeg/resources/shortkey.jpg")) - label.setObjectName("label") - horizontalLayout.addWidget(label) - QtCore.QMetaObject.connectSlotsByName(Dialog) - - -class Ui_EISeg(object): - def setupUi(self, MainWindow): - ## -- 主窗体设置 -- - MainWindow.setObjectName("MainWindow") - MainWindow.setMinimumSize(QtCore.QSize(1366, 768)) - MainWindow.setWindowTitle(__APPNAME__) - CentralWidget = QtWidgets.QWidget(MainWindow) - CentralWidget.setObjectName("CentralWidget") - MainWindow.setCentralWidget(CentralWidget) - ## ----- - ## -- 工具栏 -- - toolBar = QtWidgets.QToolBar(self) - sizePolicy = QtWidgets.QSizePolicy( - QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum - ) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(toolBar.sizePolicy().hasHeightForWidth()) - toolBar.setSizePolicy(sizePolicy) - toolBar.setMinimumSize(QtCore.QSize(0, 33)) - toolBar.setMovable(True) - toolBar.setAllowedAreas(QtCore.Qt.BottomToolBarArea | QtCore.Qt.TopToolBarArea) - toolBar.setObjectName("toolBar") - self.toolBar = toolBar - MainWindow.addToolBar(QtCore.Qt.TopToolBarArea, self.toolBar) - ## ----- - ## -- 状态栏 -- - self.statusbar = QtWidgets.QStatusBar(MainWindow) - self.statusbar.setObjectName("statusbar") - self.statusbar.setStyleSheet("QStatusBar::item {border: none;}") - MainWindow.setStatusBar(self.statusbar) - self.statusbar.addPermanentWidget(self.show_logo("eiseg/resource/Paddle.png")) - ## ----- - ## -- 图形区域 -- - ImageRegion = QtWidgets.QHBoxLayout(CentralWidget) - ImageRegion.setObjectName("ImageRegion") - # 滑动区域 - self.scrollArea = QtWidgets.QScrollArea(CentralWidget) - self.scrollArea.setWidgetResizable(True) - self.scrollArea.setObjectName("scrollArea") - ImageRegion.addWidget(self.scrollArea) - # 图形显示 - self.scene = QtWidgets.QGraphicsScene() - self.scene.addPixmap(QtGui.QPixmap()) - self.canvas = Canvas(self.scene, self) - sizePolicy = QtWidgets.QSizePolicy( - QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding - ) - self.canvas.setSizePolicy(sizePolicy) - self.canvas.setAlignment(QtCore.Qt.AlignCenter) - self.canvas.setAutoFillBackground(False) - self.canvas.setStyleSheet("background-color: White") - self.canvas.setObjectName("canvas") - self.scrollArea.setWidget(self.canvas) - ## ----- - ## -- 工作区 -- - self.dockWorker = QtWidgets.QDockWidget(MainWindow) - sizePolicy = QtWidgets.QSizePolicy( - QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred - ) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.dockWorker.sizePolicy().hasHeightForWidth()) - self.dockWorker.setSizePolicy(sizePolicy) - self.dockWorker.setMinimumSize(QtCore.QSize(71, 42)) - self.dockWorker.setWindowTitle("工作区") - self.dockWorker.setFeatures( - QtWidgets.QDockWidget.DockWidgetFloatable - | QtWidgets.QDockWidget.DockWidgetMovable - ) - self.dockWorker.setAllowedAreas( - QtCore.Qt.LeftDockWidgetArea | QtCore.Qt.RightDockWidgetArea - ) - self.dockWorker.setObjectName("dockWorker") - p_create_button = partial(self.create_button, CentralWidget) - # 设置区设置 - DockRegion = QtWidgets.QWidget() - DockRegion.setObjectName("DockRegion") - horizontalLayout = QtWidgets.QHBoxLayout(DockRegion) - horizontalLayout.setObjectName("horizontalLayout") - SetRegion = QtWidgets.QVBoxLayout() - SetRegion.setObjectName("SetRegion") - # 模型加载 - ModelRegion = QtWidgets.QVBoxLayout() - ModelRegion.setObjectName("ModelRegion") - labShowSet = self.create_text(CentralWidget, "labShowSet", "网络选择") - ModelRegion.addWidget(labShowSet) - combo = QtWidgets.QComboBox(self) - # for model in models: - # combo.addItem(model.name) - # 网络参数 - combo.addItems([m.name for m in models]) - self.comboModelSelect = combo - ModelRegion.addWidget(self.comboModelSelect) # 模型选择 - self.btnParamsSelect = p_create_button("btnParamsLoad", "加载网络参数", \ - "eiseg/resource/Model.png", "Ctrl+D") - ModelRegion.addWidget(self.btnParamsSelect) # 模型选择 - SetRegion.addLayout(ModelRegion) - SetRegion.setStretch(0, 1) - # 数据列表 - listRegion = QtWidgets.QVBoxLayout() - listRegion.setObjectName("listRegion") - labFiles = self.create_text(CentralWidget, "labFiles", "数据列表") - listRegion.addWidget(labFiles) - self.listFiles = QtWidgets.QListWidget(CentralWidget) - self.listFiles.setObjectName("listFiles") - listRegion.addWidget(self.listFiles) - # 标签列表 - labelListLab = self.create_text(CentralWidget, "labelListLab", "标签列表") - listRegion.addWidget(labelListLab) - # TODO: 改成 list widget - self.labelListTable = QtWidgets.QTableWidget(CentralWidget) - self.labelListTable.horizontalHeader().hide() - # 自适应填充 - self.labelListTable.horizontalHeader().setSectionResizeMode(QtWidgets.QHeaderView.ResizeToContents) - self.labelListTable.verticalHeader().hide() - self.labelListTable.setColumnWidth(0, 10) - # self.labelListTable.setMinimumWidth() - self.labelListTable.setObjectName("labelListTable") - listRegion.addWidget(self.labelListTable) - self.btnAddClass = p_create_button("btnAddClass", "添加标签", "eiseg/resource/Label.png") - listRegion.addWidget(self.btnAddClass) - SetRegion.addLayout(listRegion) - SetRegion.setStretch(1, 20) - # 滑块设置 - # 分割阈值 - p_create_slider = partial(self.create_slider, CentralWidget) - ShowSetRegion = QtWidgets.QVBoxLayout() - ShowSetRegion.setObjectName("ShowSetRegion") - self.sldThresh, SegShowRegion = p_create_slider( - "sldThresh", "labThresh", "分割阈值:" - ) - ShowSetRegion.addLayout(SegShowRegion) - ShowSetRegion.addWidget(self.sldThresh) - # 透明度 - self.sldOpacity, MaskShowRegion = p_create_slider( - "sldOpacity", "labOpacity", "标签透明度:" - ) - ShowSetRegion.addLayout(MaskShowRegion) - ShowSetRegion.addWidget(self.sldOpacity) - # 点大小 - self.sldClickRadius, PointShowRegion = p_create_slider( - "sldClickRadius", "labClickRadius", "点击可视化半径:", 3, 10, 1 - ) - ShowSetRegion.addLayout(PointShowRegion) - ShowSetRegion.addWidget(self.sldClickRadius) - SetRegion.addLayout(ShowSetRegion) - SetRegion.setStretch(2, 1) - # 保存 - self.btnSave = p_create_button("btnSave", "保存", "eiseg/resource/Save.png", "Ctrl+S") - SetRegion.addWidget(self.btnSave) - SetRegion.setStretch(3, 1) - # dock设置完成 - horizontalLayout.addLayout(SetRegion) - self.dockWorker.setWidget(DockRegion) - MainWindow.addDockWidget(QtCore.Qt.DockWidgetArea(2), self.dockWorker) - ## ----- - QtCore.QMetaObject.connectSlotsByName(MainWindow) - - ## 创建文本 - def create_text(self, parent, text_name=None, text_text=None): - text = QtWidgets.QLabel(parent) - if text_name is not None: - text.setObjectName(text_name) - if text_text is not None: - text.setText(text_text) - return text - - ## 创建按钮 - def create_button(self, parent, btn_name, btn_text, ico_path=None, curt=None): - # 创建和设置按钮 - sizePolicy = QtWidgets.QSizePolicy( - QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed - ) - min_size = QtCore.QSize(0, 40) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - btn = QtWidgets.QPushButton(parent) - sizePolicy.setHeightForWidth(btn.sizePolicy().hasHeightForWidth()) - btn.setSizePolicy(sizePolicy) - btn.setMinimumSize(min_size) - btn.setObjectName(btn_name) - if ico_path is not None: - btn.setIcon(QtGui.QIcon(ico_path)) - btn.setText(btn_text) - if curt is not None: - btn.setShortcut(curt) - return btn - - ## 添加动作 - # def add_action(self, parent, act_name, act_text="", ico_path=None, short_cut=None): - # act = QtWidgets.QAction(parent) - # if ico_path is not None: - # icon = QtGui.QIcon() - # icon.addPixmap(QtGui.QPixmap(ico_path), QtGui.QIcon.Normal, QtGui.QIcon.Off) - # act.setIcon(icon) - # act.setObjectName(act_name) - # act.setText(act_text) - # if short_cut is not None: - # act.setShortcut(short_cut) - # return act - - ## 创建菜单按钮 - # def add_menu(self, parent, menu_name, menu_text, acts=None): - # menu = QtWidgets.QMenu(parent) - # menu.setObjectName(menu_name) - # menu.setTitle(menu_text) - # if acts is not None: - # for act in acts: - # new_act = self.add_action(parent, act[0], act[1], act[2], act[3]) - # menu.addAction(new_act) - # return menu - - ## 创建菜单栏 - # def create_menubar(self, parent, menus): - # menuBar = QtWidgets.QMenuBar(parent) - # menuBar.setGeometry(QtCore.QRect(0, 0, 800, 26)) - # menuBar.setObjectName("menuBar") - # for menu in menus: - # menuBar.addAction(menu.menuAction()) - # return menuBar - - # ## 创建工具栏 - # def create_toolbar(self, parent, acts): - # toolBar = QtWidgets.QToolBar(parent) - # sizePolicy = QtWidgets.QSizePolicy( - # QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Minimum - # ) - # sizePolicy.setHorizontalStretch(0) - # sizePolicy.setVerticalStretch(0) - # sizePolicy.setHeightForWidth(toolBar.sizePolicy().hasHeightForWidth()) - # toolBar.setSizePolicy(sizePolicy) - # toolBar.setMinimumSize(QtCore.QSize(0, 33)) - # toolBar.setMovable(True) - # toolBar.setAllowedAreas(QtCore.Qt.BottomToolBarArea | QtCore.Qt.TopToolBarArea) - # toolBar.setObjectName("toolBar") - # for act in acts: - # new_act = self.add_action(parent, act[0], act[1], act[2], act[3]) - # toolBar.addAction(new_act) - # return toolBar - - ## 显示Logo - def show_logo(self, logo_path): - labLogo = QtWidgets.QLabel() - sizePolicy = QtWidgets.QSizePolicy( - QtWidgets.QSizePolicy.Maximum, QtWidgets.QSizePolicy.Maximum - ) - labLogo.setSizePolicy(sizePolicy) - labLogo.setMaximumSize(QtCore.QSize(100, 33)) - labLogo.setPixmap(QtGui.QPixmap(logo_path)) - labLogo.setScaledContents(True) - labLogo.setObjectName("labLogo") - return labLogo - - ## 创建滑块区域 - def create_slider( - self, - parent, - sld_name, - text_name, - text, - default_value=5, - max_value=10, - text_rate=0.1, - ): - Region = QtWidgets.QHBoxLayout() - lab = self.create_text(parent, None, text) - Region.addWidget(lab) - labShow = self.create_text(parent, text_name, str(default_value * text_rate)) - Region.addWidget(labShow) - Region.setStretch(0, 1) - Region.setStretch(1, 10) - sld = QtWidgets.QSlider(parent) - sld.setMaximum(max_value) # 好像只能整数的,这里是扩大了10倍,1 . 10 - sld.setProperty("value", default_value) - sld.setOrientation(QtCore.Qt.Horizontal) - sld.setObjectName(sld_name) - sld.setStyleSheet( - """ - QSlider::sub-page:horizontal { - background: #9999F1 - } - - QSlider::handle:horizontal - { - background: #3334E3; - width: 12px; - border-radius: 4px; - } - """ - ) - sld.textLab = labShow - return sld, Region diff --git a/contrib/EISeg/eiseg/util/__init__.py b/contrib/EISeg/eiseg/util/__init__.py deleted file mode 100644 index 07c442bcd9..0000000000 --- a/contrib/EISeg/eiseg/util/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .qt import newAction, addActions, struct, newIcon -from .label import saveLabel, readLabel -from .config import parseConfigs, saveConfigs -from .network import model_path -from .colormap import ColorMask \ No newline at end of file diff --git a/contrib/EISeg/eiseg/util/colormap.py b/contrib/EISeg/eiseg/util/colormap.py deleted file mode 100644 index 44ae9ee03b..0000000000 --- a/contrib/EISeg/eiseg/util/colormap.py +++ /dev/null @@ -1,26 +0,0 @@ -import os.path as osp -import random - - -class ColorMask(object): - def __init__( - self, color_path, shuffle=False - ): - self.color_maps = [] - self.index = 0 - with open(color_path, "r") as f: - self.color_maps = f.readlines() - if shuffle: - random.shuffle(self.color_maps) - self.color_map_nums = len(self.color_maps) - - def get_color(self): - color = self.color_maps[self.index].strip() - self.index += 1 - if self.index == self.color_map_nums: - self.index = 0 - return self.to_list(color) - - def to_list(self, color): - r, g, b = color.split(",") - return [int(r), int(g), int(b)] diff --git a/contrib/EISeg/eiseg/util/cython/__init__.py b/contrib/EISeg/eiseg/util/cython/__init__.py deleted file mode 100644 index eb66bdbba8..0000000000 --- a/contrib/EISeg/eiseg/util/cython/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# noinspection PyUnresolvedReferences -from .dist_maps import get_dist_maps \ No newline at end of file diff --git a/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyx b/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyx deleted file mode 100644 index 779a7f02ad..0000000000 --- a/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyx +++ /dev/null @@ -1,63 +0,0 @@ -import numpy as np -cimport cython -cimport numpy as np -from libc.stdlib cimport malloc, free - -ctypedef struct qnode: - int row - int col - int layer - int orig_row - int orig_col - -@cython.infer_types(True) -@cython.boundscheck(False) -@cython.wraparound(False) -@cython.nonecheck(False) -def get_dist_maps(np.ndarray[np.float32_t, ndim=2, mode="c"] points, - int height, int width, float norm_delimeter): - cdef np.ndarray[np.float32_t, ndim=3, mode="c"] dist_maps = \ - np.full((2, height, width), 1e6, dtype=np.float32, order="C") - - cdef int *dxy = [-1, 0, 0, -1, 0, 1, 1, 0] - cdef int i, j, x, y, dx, dy - cdef qnode v - cdef qnode *q = malloc((4 * height * width + 1) * sizeof(qnode)) - cdef int qhead = 0, qtail = -1 - cdef float ndist - - for i in range(points.shape[0]): - x, y = round(points[i, 0]), round(points[i, 1]) - if x >= 0: - qtail += 1 - q[qtail].row = x - q[qtail].col = y - q[qtail].orig_row = x - q[qtail].orig_col = y - if i >= points.shape[0] / 2: - q[qtail].layer = 1 - else: - q[qtail].layer = 0 - dist_maps[q[qtail].layer, x, y] = 0 - - while qtail - qhead + 1 > 0: - v = q[qhead] - qhead += 1 - - for k in range(4): - x = v.row + dxy[2 * k] - y = v.col + dxy[2 * k + 1] - - ndist = ((x - v.orig_row)/norm_delimeter) ** 2 + ((y - v.orig_col)/norm_delimeter) ** 2 - if (x >= 0 and y >= 0 and x < height and y < width and - dist_maps[v.layer, x, y] > ndist): - qtail += 1 - q[qtail].orig_col = v.orig_col - q[qtail].orig_row = v.orig_row - q[qtail].layer = v.layer - q[qtail].row = x - q[qtail].col = y - dist_maps[v.layer, x, y] = ndist - - free(q) - return dist_maps diff --git a/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyxbld b/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyxbld deleted file mode 100644 index bd44517292..0000000000 --- a/contrib/EISeg/eiseg/util/cython/_get_dist_maps.pyxbld +++ /dev/null @@ -1,7 +0,0 @@ -import numpy - -def make_ext(modname, pyxfilename): - from distutils.extension import Extension - return Extension(modname, [pyxfilename], - include_dirs=[numpy.get_include()], - extra_compile_args=['-O3'], language='c++') diff --git a/contrib/EISeg/eiseg/util/cython/dist_maps.py b/contrib/EISeg/eiseg/util/cython/dist_maps.py deleted file mode 100644 index 8ffa1e3f25..0000000000 --- a/contrib/EISeg/eiseg/util/cython/dist_maps.py +++ /dev/null @@ -1,3 +0,0 @@ -import pyximport; pyximport.install(pyximport=True, language_level=3) -# noinspection PyUnresolvedReferences -from ._get_dist_maps import get_dist_maps \ No newline at end of file diff --git a/contrib/EISeg/eiseg/util/distributed.py b/contrib/EISeg/eiseg/util/distributed.py deleted file mode 100644 index 05306b4631..0000000000 --- a/contrib/EISeg/eiseg/util/distributed.py +++ /dev/null @@ -1,33 +0,0 @@ -import paddle -import paddle.distributed as dist -import numpy as np - -def get_rank(): - return dist.get_rank() - - -def synchronize(): - return dist.barrier() - - -def get_world_size(): - return dist.get_world_size() - - -def reduce_loss_dict(loss_dict): - world_size = get_world_size() - - if world_size < 2: - return loss_dict - with paddle.no_grad(): - keys = [] - losses = [] - - for k in loss_dict.keys(): - keys.append(k) - loss = dist.all_reduce(loss_dict[k].astype('float32')) / paddle.distributed.get_world_size() - losses.append(loss) - - reduced_losses = {k: v for k, v in zip(keys, losses)} - - return reduced_losses diff --git a/contrib/EISeg/eiseg/util/exp.py b/contrib/EISeg/eiseg/util/exp.py deleted file mode 100644 index c2d6d33cf1..0000000000 --- a/contrib/EISeg/eiseg/util/exp.py +++ /dev/null @@ -1,154 +0,0 @@ -import os -import sys -import shutil -import pprint -from pathlib import Path -from datetime import datetime -import paddle -import yaml -from easydict import EasyDict as edict -import filelock - -TMP_HOME = 'tmp' - -def init_experiment(args, model_name): - nranks = paddle.distributed.ParallelEnv().nranks - local_rank = paddle.distributed.ParallelEnv().local_rank - model_path = Path(args.model_path) - ftree = get_model_family_tree(model_path, model_name=model_name) - if ftree is None: - print('Models can only be located in the "models" directory in the root of the repository') - sys.exit(1) - - cfg = load_config(model_path) - update_config(cfg, args) - - experiments_path = Path(cfg.EXPS_PATH) - exp_parent_path = experiments_path / '/'.join(ftree) - lock = filelock.FileLock(os.path.join(experiments_path, '.tmp')) - - with lock: - if local_rank == 0: - exp_parent_path.mkdir(parents=True, exist_ok=True) - - if cfg.resume_exp: - exp_path = find_resume_exp(exp_parent_path, cfg.resume_exp) - else: - last_exp_indx = find_last_exp_indx(exp_parent_path) - exp_name = f'{last_exp_indx:03d}' - if cfg.exp_name: - exp_name += '_' + cfg.exp_name - exp_path = exp_parent_path / exp_name - exp_path.mkdir(parents=True) - - cfg.EXP_PATH = exp_path - cfg.CHECKPOINTS_PATH = exp_path / 'checkpoints' - cfg.VIS_PATH = exp_path / 'vis' - cfg.LOGS_PATH = exp_path / 'logs' - - cfg.LOGS_PATH.mkdir(exist_ok=True) - cfg.CHECKPOINTS_PATH.mkdir(exist_ok=True) - cfg.VIS_PATH.mkdir(exist_ok=True) - - dst_script_path = exp_path / (model_path.stem + datetime.strftime(datetime.today(), '_%Y-%m-%d-%H-%M-%S.py')) - shutil.copy(model_path, dst_script_path) - - if cfg.gpus != '': - gpu_ids = [int(id) for id in cfg.gpus.split(',')] - else: - gpu_ids = list(range(cfg.ngpus)) - cfg.gpus = ','.join([str(id) for id in gpu_ids]) - cfg.gpu_ids = gpu_ids - cfg.ngpus = len(gpu_ids) - cfg.multi_gpu = cfg.ngpus > 1 - - return cfg - - -def get_model_family_tree(model_path, terminate_name='train_script', model_name=None): - if model_name is None: - model_name = model_path.stem - - family_tree = [model_name] - for x in model_path.parents: - if x.stem == terminate_name: - break - family_tree.append(x.stem) - else: - return None - - return family_tree[::-1] - - -def find_last_exp_indx(exp_parent_path): - indx = 0 - for x in exp_parent_path.iterdir(): - if not x.is_dir(): - continue - - exp_name = x.stem - if exp_name[:3].isnumeric(): - indx = max(indx, int(exp_name[:3]) + 1) - - return indx - - -def find_resume_exp(exp_parent_path, exp_pattern): - candidates = sorted(exp_parent_path.glob(f'{exp_pattern}*')) - if len(candidates) == 0: - print(f'No experiments could be found that satisfies the pattern = "*{exp_pattern}"') - sys.exit(1) - elif len(candidates) > 1: - print('More than one experiment found:') - for x in candidates: - print(x) - sys.exit(1) - else: - exp_path = candidates[0] - print(f'Continue with experiment "{exp_path}"') - - return exp_path - - -def update_config(cfg, args): - for param_name, value in vars(args).items(): - if param_name.lower() in cfg or param_name.upper() in cfg: - continue - cfg[param_name] = value - - -def load_config(model_path): - model_name = model_path.stem - config_path = model_path.parent / (model_name + '.yml') - - if config_path.exists(): - cfg = load_config_file(config_path) - else: - cfg = dict() - - cwd = Path.cwd() - config_parent = config_path.parent.absolute() - while len(config_parent.parents) > 0: - config_path = config_parent / 'config.yml' - - if config_path.exists(): - local_config = load_config_file(config_path, model_name=model_name) - cfg.update({k: v for k, v in local_config.items() if k not in cfg}) - - if config_parent.absolute() == cwd: - break - config_parent = config_parent.parent - - return edict(cfg) - - -def load_config_file(config_path, model_name=None, return_edict=False): - with open(config_path, 'r') as f: - cfg = yaml.safe_load(f) - - if 'SUBCONFIGS' in cfg: - if model_name is not None and model_name in cfg['SUBCONFIGS']: - cfg.update(cfg['SUBCONFIGS'][model_name]) - del cfg['SUBCONFIGS'] - - return edict(cfg) if return_edict else cfg diff --git a/contrib/EISeg/eiseg/util/label.py b/contrib/EISeg/eiseg/util/label.py deleted file mode 100644 index 621128c141..0000000000 --- a/contrib/EISeg/eiseg/util/label.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import os.path as osp - - -def toint(seq): - for idx in range(len(seq)): - try: - seq[idx] = int(seq[idx]) - except ValueError: - pass - return seq - - -def saveLabel(labelList, path): - # labelList = [[1, "人", [0, 0, 0]], [2, "车", [128, 128, 128]]] - print("save label", labelList, path) - print(osp.exists(osp.dirname(path)), osp.dirname(path)) - if not path or len(path) == 0 or not osp.exists(osp.dirname(path)): - print("save label error") - return - with open(path, "w", encoding="utf-8") as f: - for l in labelList: - for idx in range(2): - print(l[idx], end=" ", file=f) - for idx in range(3): - print(l[2][idx], end=" ", file=f) - print(file=f) - - -# saveLabel("label.txt") - - -def readLabel(path): - if not path or len(path) == 0 or not osp.exists(path): - return [] - - with open(path, "r", encoding="utf-8") as f: - labels = f.readlines() - labelList = [] - for lab in labels: - lab = lab.replace("\n", "").strip(" ").split(" ") - if len(lab) != 2 and len(lab) != 5: - print("标签不合法") - continue - label = toint(lab[:2]) - label.append(toint(lab[2:])) - labelList.append(label) - print(labelList) - return labelList - - -# readLabel("label.txt") diff --git a/contrib/EISeg/eiseg/util/network.py b/contrib/EISeg/eiseg/util/network.py deleted file mode 100644 index c6ee2a84c8..0000000000 --- a/contrib/EISeg/eiseg/util/network.py +++ /dev/null @@ -1,45 +0,0 @@ -from pathlib import Path, PurePath -import wget -import hashlib - -# url = "http://182.61.31.110/model/hrnet18s_ocr48_human_f_007.pdparams" -# url = "http://localhost/hrnet18s_ocr48_human_f_007.pdparams" - - -def model_path(name, refresh=False): - local_path = Path.home() / Path(".EISeg/model", name, name + ".pdparams") - print(local_path) - - if local_path.exists(): - return str(local_path) - - def bar_custom(current, total, width=80): - print(current, total) - - def bar_dummy(current, total, width=80): - pass - - for f in local_path.parent.glob("*.tmp"): - f.unlink() - if not local_path.parent.exists(): - local_path.parent.mkdir() - - # base_url = "http://182.61.31.110" - base_url = "http://localhost" - param_url = f"{base_url}/model/{name}/{name}.pdparams" - md5_url = f"{base_url}/model/{name}/{name}.md5" - md5_path = local_path.parent / Path(name + ".md5") - wget.download(md5_url, str(md5_path), bar_dummy) - remote_md5 = md5_path.read_text() - - for _ in range(5): - wget.download(param_url, str(local_path), bar_custom) - local_md5 = hashlib.md5(local_path.read_bytes()).hexdigest() - if local_md5 == remote_md5: - md5_path.unlink() - return str(local_path) - print("error") - return None - - -# model_path("hrnet18s_ocr48_human_f_007") diff --git a/contrib/EISeg/pick2exe.py b/contrib/EISeg/pick2exe.py deleted file mode 100644 index 52d3b322e7..0000000000 --- a/contrib/EISeg/pick2exe.py +++ /dev/null @@ -1,7 +0,0 @@ -from qpt.executor import CreateExecutableModule - -if __name__ == "__main__": - module = CreateExecutableModule( - work_dir="./EISeg", launcher_py_path="./EISeg/exe.py", save_path="./out" - ) - module.make() diff --git a/contrib/EISeg/setup.py b/contrib/EISeg/setup.py deleted file mode 100644 index fbad5fbd0b..0000000000 --- a/contrib/EISeg/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -import pathlib -from setuptools import setup, find_packages -from Cython.Build import cythonize - -HERE = pathlib.Path(__file__).parent - -README = (HERE / "README.md").read_text(encoding="utf-8") - - -setup( - name="EISeg", - version="0.1.6", - description="交互式标注软件", - long_description=README, - long_description_content_type="text/markdown", - url="https://github.com/PaddleCV-SIG/EISeg", - author="Paddlecv-SIG", - author_email="linhandev@qq.com", - license="MIT", - classifiers=[ - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - ], - # ext_modules=cythonize(["EISeg/cython_dict/_get_dist_maps.pyx"]), - packages=find_packages(exclude=("test",)), - # packages=["EISeg"], - include_package_data=True, - install_requires=[ - "pyqt5", - "qtpy", - "opencv-python", - "scipy", - "paddleseg", - "albumentations", - "cython", - "pyyaml", - ], - entry_points={ - "console_scripts": [ - "eiseg=eiseg.run:main", - ] - }, -) diff --git a/contrib/EISeg/todo b/contrib/EISeg/todo deleted file mode 100644 index f9045dc0c8..0000000000 --- a/contrib/EISeg/todo +++ /dev/null @@ -1,4 +0,0 @@ -cython -zoomin需要,brs可以不要 -baseline可以不用,mask可以不用 -4个模型,大小 diff --git a/contrib/EISeg/tool/show_label.py b/contrib/EISeg/tool/show_label.py deleted file mode 100644 index f3da1ab57a..0000000000 --- a/contrib/EISeg/tool/show_label.py +++ /dev/null @@ -1,7 +0,0 @@ -import cv2 -import matplotlib.pyplot as plt - -img = cv2.imread("/home/lin/Desktop/label/467594346.png") -img = img * 255 -plt.imshow(img) -plt.show() diff --git a/contrib/HumanSeg/scripts/__init__.py b/contrib/HumanSeg/scripts/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/contrib/HumanSeg/README.md b/contrib/PP-HumanSeg/README.md similarity index 67% rename from contrib/HumanSeg/README.md rename to contrib/PP-HumanSeg/README.md index f424939526..207575ee41 100644 --- a/contrib/HumanSeg/README.md +++ b/contrib/PP-HumanSeg/README.md @@ -1,9 +1,8 @@ -# 人像分割PPSeg +# 人像分割PP-HumanSeg -人像分割是图像分割领域非常常见的应用,PaddleSeg推出了在大规模人像数据上训练的人像分割PPSeg模型,满足在服务端、移动端、Web端多种使用场景的需求。本教程提供从训练到部署的全流程应用指南,以及视频流人像分割、背景替换的实际效果体验。最新发布超轻量级人像分割模型,支持Web端、移动端场景的实时分割。 +人像分割是图像分割领域非常常见的应用,PaddleSeg推出了在大规模人像数据上训练的人像分割PP-HumanSeg模型,满足在服务端、移动端、Web端多种使用场景的需求。本教程提供从训练到部署的全流程应用指南,以及视频流人像分割、背景替换的实际效果体验。最新发布超轻量级人像分割模型,支持Web端、移动端场景的实时分割。 - -近期 **百度视频会议** 上线了虚拟背景功能,支持在网页端视频会议时进行背景切换和背景虚化。其中人像换背景模型采用我们的**超轻量级模型PPSeg-Lite**。欢迎前去[百度首页](https://www.baidu.com/)右下角体验效果! +近期 **百度视频会议** 上线了虚拟背景功能,支持在网页端视频会议时进行背景切换和背景虚化。其中人像换背景模型采用我们的**超轻量级模型PP-HumanSeg-Lite**。欢迎前去[百度首页](https://www.baidu.com/)右下角体验效果!

@@ -15,20 +14,20 @@ - [快速体验](#快速体验) - [视频流人像分割](#视频流人像分割) - [视频流背景替换](#视频流背景替换) -- [训练评估预测](#训练评估预测) +- [训练评估预测演示](#训练评估预测演示) - [模型导出](#模型导出) - [Web端部署](#Web端部署) - [移动端部署](#移动端部署) ## 人像分割模型 ### 通用人像分割(Generic Human Segmentation) -PPSeg开放了在大规模人像数据上训练的三个人像模型,满足服务端、移动端、Web端多种使用场景的需求。 +PP-HumanSeg开放了在大规模人像数据上训练的三个人像模型,满足服务端、移动端、Web端多种使用场景的需求。 | 模型名 | 模型说明 | Checkpoint | Inference Model | | --- | --- | --- | ---| -| PPSeg-Server | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/ResNet50, 输入大小(512, 512) |[ppseg_server_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip) | [ppseg_server_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip) | -| PPSeg-Mobile | 轻量级模型,适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | [ppseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/fcn_hrnetw18_small_v1_humanseg_192x192.zip) | [ppseg_mobile_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax.zip) | -| PPSeg-Lite | 超轻量级模型,适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为百度自研模型,输入大小(192, 192) | [ppseg_lite_ckpt]() | [ppseg_lite_inference]() | +| PP-HumanSeg-Server | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/ResNet50, 输入大小(512, 512) |[pp_humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip) | [pp_humanseg_server_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip) | +| PP-HumanSeg-Mobile | 轻量级模型,适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | [pp_humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/fcn_hrnetw18_small_v1_humanseg_192x192.zip) | [pp_humanseg_mobile_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax.zip) | +| PP-HumanSeg-Lite | 超轻量级模型,适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为百度自研模型,输入大小(192, 192) | [pp-humanseg_lite_ckpt]() | [pp-humanseg_lite_inference]() | NOTE: @@ -42,25 +41,25 @@ NOTE: | 模型名 |Input Size | FLOPS | Parameters | 计算耗时 | 模型大小 | |-|-|-|-|-|-| -| PPSeg-Server | 512x512 | 114G | 26.8M | 37.96ms | 103Mb | -| PPSeg-Mobile | 192x192 | 584M | 1.54M | 13.17ms | 5.9Mb | -| PPSeg-Lite | 192x192 | 121M | 137K | 10.51ms | 543Kb | +| PP-HumanSeg-Server | 512x512 | 114G | 26.8M | 37.96ms | 103Mb | +| PP-HumanSeg-Mobile | 192x192 | 584M | 1.54M | 13.17ms | 5.9Mb | +| PP-HumanSeg-Lite | 192x192 | 121M | 137K | 10.51ms | 543Kb | 测试环境:Nvidia Tesla V100单卡。 ### 半身像分割(Portrait Segmentation) -针对Portrait segmentation场景,PPSeg开放了半身像分割模型,该模型已应用于百度视频会议。 +针对Portrait segmentation场景,PP-HumanSeg开放了半身像分割模型,该模型已应用于百度视频会议。 | 模型名 | 模型说明 | Checkpoint | Inference Model | | --- | --- | --- | ---| -| PPSeg-Lite | 超轻量级模型,适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为百度自研模型,推荐输入大小(398,224) | [ppseg_lite_portrait_ckpt](https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224.tar.gz) | [ppseg_lite_portrait_inference](https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224_with_softmax.tar.gz) | +| PP-HumanSeg-Lite | 超轻量级模型,适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为百度自研模型,推荐输入大小(398,224) | [pp_humanseg_lite_portrait_ckpt](https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224.tar.gz) | [pp_humanseg_lite_portrait_inference](https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224_with_softmax.tar.gz) | #### 模型性能 | 模型名 |Input Size | FLOPS | Parameters | 计算耗时 | 模型大小 | |-|-|-|-|-|-| -| PPSeg-Lite | 398x224 | 266M | 137K | 23.49ms | 543Kb | -| PPSeg-Lite | 288x162 | 138M | 137K | 15.62ms | 543Kb | +| PP-HumanSeg-Lite | 398x224 | 266M | 137K | 23.49ms | 543Kb | +| PP-HumanSeg-Lite | 288x162 | 138M | 137K | 15.62ms | 543Kb | 测试环境: 使用Paddle.js converter优化图结构,部署于Web端,显卡型号AMD Radeon Pro 5300M 4 GB。 @@ -91,9 +90,9 @@ git clone https://github.com/PaddlePaddle/PaddleSeg ``` ## 快速体验 -以下所有命令均在`PaddleSeg/contrib/HumanSeg`目录下执行。 +以下所有命令均在`PaddleSeg/contrib/PP-HumanSeg`目录下执行。 ```shell -cd PaddleSeg/contrib/HumanSeg +cd PaddleSeg/contrib/PP-HumanSeg ``` ### 下载Inference Model @@ -104,7 +103,7 @@ python export_model/download_export_model.py ``` ### 下载测试数据 -我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleSeg可直接加载数据格式,同时提供了手机前置摄像头的人像测试视频`video_test.mp4`。通过运行以下代码进行快速下载: +我们提供了一些测试数据,从人像分割数据集 [Supervise.ly Person](https://app.supervise.ly/ecosystem/projects/persons) 中随机抽取一小部分并转化成PaddleSeg可直接加载数据格式,以下称为mini_supervisely,同时提供了手机前置摄像头的人像测试视频`video_test.mp4`。通过运行以下代码进行快速下载: ```bash python data/download_data.py @@ -162,7 +161,9 @@ python bg_replace.py \ Portrait模型适用于宽屏拍摄场景,竖屏效果会略差一些。 -## 训练评估预测 +## 训练评估预测演示 +如果上述大规模数据预训练的模型不能满足您的精度需要,可以基于上述模型在您的场景中进行Fine-tuning,以更好地适应您的使用场景。 + ### 下载预训练模型 执行以下脚本快速下载所有Checkpoint作为预训练模型 @@ -171,7 +172,7 @@ python pretrained_model/download_pretrained_model.py ``` ### 训练 -基于上述大规模数据预训练的模型,在抽取的部分[supervise.ly](https://supervise.ly/)数据集上进行Fine-tuning,以HRNet w18 small v1为例,训练命令如下: +演示基于上述模型进行Fine-tuning。我们使用抽取的mini_supervisely数据集作为示例数据集,以PP-HumanSeg-Mobile为例,训练命令如下: ```bash export CUDA_VISIBLE_DEVICES=0 # 设置1张可用的卡 # windows下请执行以下命令 @@ -220,12 +221,12 @@ python ../../export.py \ --without_argmax --with_softmax ``` -导出PPSeg-Lite模型: +导出PP-HumanSeg-Lite模型: ```shell python ../../export.py \ ---config ../../configs/ppseg_lite/ppseg_lite_export_398x224.yml \ ---save_dir export_model/ppseg_lite_portrait_398x224_with_softmax \ +--config ../../configs/pp_humanseg_lite/pp_humanseg_lite_export_398x224.yml \ +--save_dir export_model/pp_humanseg_lite_portrait_398x224_with_softmax \ --model_path pretrained_model/ppseg_lite_portrait_398x224/model.pdparams \ --without_argmax --with_softmax ``` diff --git a/contrib/HumanSeg/bg_replace.py b/contrib/PP-HumanSeg/bg_replace.py similarity index 98% rename from contrib/HumanSeg/bg_replace.py rename to contrib/PP-HumanSeg/bg_replace.py index a21cd6d9bd..5a858d1633 100644 --- a/contrib/HumanSeg/bg_replace.py +++ b/contrib/PP-HumanSeg/bg_replace.py @@ -25,7 +25,7 @@ def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg inference for video') + parser = argparse.ArgumentParser(description='PP-HumanSeg inference for video') parser.add_argument( "--config", dest="cfg", diff --git a/contrib/HumanSeg/configs/fcn_hrnetw18_small_v1_humanseg_192x192_mini_supervisely.yml b/contrib/PP-HumanSeg/configs/fcn_hrnetw18_small_v1_humanseg_192x192_mini_supervisely.yml similarity index 100% rename from contrib/HumanSeg/configs/fcn_hrnetw18_small_v1_humanseg_192x192_mini_supervisely.yml rename to contrib/PP-HumanSeg/configs/fcn_hrnetw18_small_v1_humanseg_192x192_mini_supervisely.yml diff --git a/contrib/HumanSeg/data/background.jpg b/contrib/PP-HumanSeg/data/background.jpg similarity index 100% rename from contrib/HumanSeg/data/background.jpg rename to contrib/PP-HumanSeg/data/background.jpg diff --git a/contrib/HumanSeg/data/download_data.py b/contrib/PP-HumanSeg/data/download_data.py similarity index 100% rename from contrib/HumanSeg/data/download_data.py rename to contrib/PP-HumanSeg/data/download_data.py diff --git a/contrib/HumanSeg/data/human_image.jpg b/contrib/PP-HumanSeg/data/human_image.jpg similarity index 100% rename from contrib/HumanSeg/data/human_image.jpg rename to contrib/PP-HumanSeg/data/human_image.jpg diff --git a/contrib/HumanSeg/datasets/humanseg.py b/contrib/PP-HumanSeg/datasets/humanseg.py similarity index 100% rename from contrib/HumanSeg/datasets/humanseg.py rename to contrib/PP-HumanSeg/datasets/humanseg.py diff --git a/contrib/HumanSeg/deploy/infer.py b/contrib/PP-HumanSeg/deploy/infer.py similarity index 100% rename from contrib/HumanSeg/deploy/infer.py rename to contrib/PP-HumanSeg/deploy/infer.py diff --git a/contrib/HumanSeg/export_model/download_export_model.py b/contrib/PP-HumanSeg/export_model/download_export_model.py similarity index 100% rename from contrib/HumanSeg/export_model/download_export_model.py rename to contrib/PP-HumanSeg/export_model/download_export_model.py diff --git a/contrib/HumanSeg/predict.py b/contrib/PP-HumanSeg/predict.py similarity index 100% rename from contrib/HumanSeg/predict.py rename to contrib/PP-HumanSeg/predict.py diff --git a/contrib/HumanSeg/pretrained_model/download_pretrained_model.py b/contrib/PP-HumanSeg/pretrained_model/download_pretrained_model.py similarity index 100% rename from contrib/HumanSeg/pretrained_model/download_pretrained_model.py rename to contrib/PP-HumanSeg/pretrained_model/download_pretrained_model.py diff --git a/contrib/EISeg/eiseg/model/modeling/__init__.py b/contrib/PP-HumanSeg/scripts/__init__.py similarity index 100% rename from contrib/EISeg/eiseg/model/modeling/__init__.py rename to contrib/PP-HumanSeg/scripts/__init__.py diff --git a/contrib/HumanSeg/scripts/optic_flow_process.py b/contrib/PP-HumanSeg/scripts/optic_flow_process.py similarity index 100% rename from contrib/HumanSeg/scripts/optic_flow_process.py rename to contrib/PP-HumanSeg/scripts/optic_flow_process.py diff --git a/contrib/HumanSeg/scripts/train.py b/contrib/PP-HumanSeg/scripts/train.py similarity index 100% rename from contrib/HumanSeg/scripts/train.py rename to contrib/PP-HumanSeg/scripts/train.py diff --git a/contrib/HumanSeg/train.py b/contrib/PP-HumanSeg/train.py similarity index 100% rename from contrib/HumanSeg/train.py rename to contrib/PP-HumanSeg/train.py diff --git a/contrib/HumanSeg/val.py b/contrib/PP-HumanSeg/val.py similarity index 100% rename from contrib/HumanSeg/val.py rename to contrib/PP-HumanSeg/val.py diff --git a/contrib/PanopticDeepLab/README.md b/contrib/PanopticDeepLab/README.md index d4faa744fa..e4e8ff6ba2 100644 --- a/contrib/PanopticDeepLab/README.md +++ b/contrib/PanopticDeepLab/README.md @@ -40,7 +40,7 @@ cd contrib/PanopticDeepLab ## 数据集准备 -将数据集放置于`data`目录下。 +将数据集放置于PaddleSeg/contrib/PanopticDeepLab目录下的`data`目录下。 ### Cityscapes @@ -117,7 +117,7 @@ python val.py --help ```shell export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 # 根据实际情况进行显卡数量的设置 python -m paddle.distributed.launch predict.py \ - --config configs/panoptic_deeplab/panoptic_deeplab_resnet50_os32_cityscapes_1025x513_120k.yml \ + --config configs/panoptic_deeplab/panoptic_deeplab_resnet50_os32_cityscapes_1025x513_bs8_90k_lr00005.yml \ --model_path output/iter_90000/model.pdparams \ --image_path data/cityscapes/leftImg8bit/val/ \ --save_dir ./output/result diff --git a/contrib/remote_sensing/README.md b/contrib/remote_sensing/README.md deleted file mode 100644 index 9ba9517c88..0000000000 --- a/contrib/remote_sensing/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Deep High-Resolution Representation Learning for Visual Recognition - -## Reference -> Wang J, Sun K, Cheng T, et al. Deep high-resolution representation learning for visual recognition[J]. IEEE transactions on pattern analysis and machine intelligence, 2020. - -## Performance - -### 2020 CCF BDCI 遥感影像地块分割 - -| Model | Backbone | Resolution | Training Iters | Val mIoU | Val mIoU (ms+flip) | Test mIoU (ms+flip) | Links | -|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -|FCN|HRNet_W48|256x256|160000|71.55%|72.42%|70.36%|[model](https://bj.bcebos.com/paddleseg/dygraph/ccf/fcn_hrnetw48_rs_256x256_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/ccf/fcn_hrnetw48_rs_256x256_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=3db72b204840a0d4a0e69f60d4b54b54)| -|OCRNet|HRNet_W48|256x256|160000|71.77%|72.68%|70.42%|[model](https://bj.bcebos.com/paddleseg/dygraph/ccf/ocr_hrnetw48_rs_256x256_160k/model.pdparams) \| [log](https://bj.bcebos.com/paddleseg/dygraph/ccf/ocr_hrnetw48_rs_256x256_160k/train.log) \| [vdl](https://paddlepaddle.org.cn/paddle/visualdl/service/app?id=690745a3020eab5c9123fa78f9c020f9)| diff --git a/contrib/remote_sensing/fcn_hrnetw48_ccf_256x256_160k.yml b/contrib/remote_sensing/fcn_hrnetw48_ccf_256x256_160k.yml deleted file mode 100644 index 7746044713..0000000000 --- a/contrib/remote_sensing/fcn_hrnetw48_ccf_256x256_160k.yml +++ /dev/null @@ -1,60 +0,0 @@ -batch_size: 16 -iters: 160000 - -train_dataset: - type: Dataset - dataset_root: data/rs_data - num_classes: 7 - train_path: data/rs_data/train_list.txt - transforms: - - type: ResizeStepScaling - min_scale_factor: 0.75 - max_scale_factor: 1.5 - scale_step_size: 0.25 - - type: RandomHorizontalFlip - - type: RandomVerticalFlip - - type: RandomRotation - max_rotation: 30 - - type: RandomDistort - brightness_range: 0.2 - contrast_range: 0.2 - saturation_range: 0.2 - - type: RandomPaddingCrop - crop_size: [256, 256] - - type: Normalize - mode: train - -val_dataset: - type: Dataset - dataset_root: data/rs_data - num_classes: 7 - val_path: data/rs_data/val_list.txt - transforms: - - type: Normalize - mode: val - - -optimizer: - type: sgd - momentum: 0.9 - weight_decay: 4.0e-5 - -lr_scheduler: - type: PolynomialDecay - learning_rate: 0.01 - end_lr: 0 - power: 0.9 - -loss: - types: - - type: CrossEntropyLoss - coef: [1] - -model: - type: FCN - backbone: - type: HRNet_W48 - align_corners: False - pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w48_ssld.tar.gz - backbone_indices: [-1] - pretrained: Null diff --git a/contrib/remote_sensing/ocrnet_hrnetw48_ccf_256x256_80k.yml b/contrib/remote_sensing/ocrnet_hrnetw48_ccf_256x256_80k.yml deleted file mode 100644 index 2e241ad6f3..0000000000 --- a/contrib/remote_sensing/ocrnet_hrnetw48_ccf_256x256_80k.yml +++ /dev/null @@ -1,20 +0,0 @@ -_base_: 'fcn_hrnetw48_ccf_256x256_160k.yml' - -batch_size: 8 -iters: 80000 - -model: - type: OCRNet - pretrained: https://bj.bcebos.com/paddleseg/dygraph/ccf/fcn_hrnetw48_rs_256x256_160k/model.pdparams - -learning_rate: - value: 0.001 - decay: - type: poly - power: 0.9 - end_lr: 0.0 - -loss: - types: - - type: CrossEntropyLoss - coef: [1, 0.4] diff --git a/deploy/lite/README.md b/deploy/lite/README.md index 08bbc26ad2..ba4a9e06af 100644 --- a/deploy/lite/README.md +++ b/deploy/lite/README.md @@ -53,7 +53,7 @@ Paddle-Lite的编译目前支持Docker,Linux和Mac OS开发环境,建议使 准备好上述文件,即可参考[java_api](https://paddle-lite.readthedocs.io/zh/release-v2.8/api_reference/java_api_doc.html)在安卓端进行推理。具体使用预测库的方法可参考[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo)中更新预测库部分的文档。 ### 3.2 模型导出 -此demo的人像分割模型为基于HRNet w18 small v1的humanseg模型([下载链接](https://bj.bcebos.com/paddleseg/deploy/lite/android/hrnet_w18_small.tar.gz)),更多的分割模型导出可参考:[模型导出](../../docs/model_export.md) +此demo的人像分割模型为基于HRNet w18 small v1的PP-HumanSeg模型([下载链接](https://bj.bcebos.com/paddleseg/deploy/lite/android/hrnet_w18_small.tar.gz)),更多的分割模型导出可参考:[模型导出](../../docs/model_export.md) ### 3.3 模型转换 diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 9a7ee33e6c..9e6f830253 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -260,6 +260,16 @@ def parse_args(): help='When `--benchmark` is True, the specified model name is displayed.' ) + parser.add_argument( + '--use_cpu', + dest='use_cpu', + help='Whether to use X86 CPU for inference. Uses GPU in default.', + action='store_true') + parser.add_argument( + '--use_mkldnn', + dest='use_mkldnn', + help='Whether to use MKLDNN to accelerate prediction.', + action='store_true') parser.add_argument( '--with_argmax', dest='with_argmax', diff --git a/docs/apis/README.md b/docs/apis/README.md index d42ebd243f..88d8d1504a 100644 --- a/docs/apis/README.md +++ b/docs/apis/README.md @@ -1,11 +1,18 @@ -## 数据处理与增强 -paddleseg.transforms -paddleseg.datasets -## 视觉模型集 -paddleseg.models -## 骨干网络 -paddleseg.models.backbone -## 训练、评估和预测 -paddleseg.core -## 标准归一化 -paddleseg.cvlibs +English | [简体中文](README_CN.md) +## Data Transformation (Data Augmentation) +[paddleseg.transforms](./transforms/transforms.md) + +## Dataset Processing +[paddleseg.datasets](./datasets/datasets.md) + +## Semantic Segmentation Model Set +[paddleseg.models](./models/models.md) + +## Backbone Networks +[paddleseg.models.backbone](./backbones/backbones.md) + +## Training,Evaluating and Predicting +[paddleseg.core](./core/core.md) + +## Computer Vision Library +[paddleseg.cvlibs](./cvlibs/cvlibs.md) \ No newline at end of file diff --git a/docs/apis/README_CN.md b/docs/apis/README_CN.md new file mode 100644 index 0000000000..63ff6747d8 --- /dev/null +++ b/docs/apis/README_CN.md @@ -0,0 +1,18 @@ +简体中文 | [English](README.md) +## 数据变换(数据增强) +[paddleseg.transforms](./transforms/transforms_cn.md) + +## 数据集处理 +[paddleseg.datasets](./datasets/datasets_cn.md) + +## 语义分割模型集 +[paddleseg.models](./models/models_cn.md) + +## 骨干网络 +[paddleseg.models.backbone](./backbones/backbones_cn.md) + +## 训练、评估和预测 +[paddleseg.core](./core/core_cn.md) + +## 计算机视觉库 +[paddleseg.cvlibs](./cvlibs/cvlibs_cn.md) diff --git a/docs/apis/transforms/datasets.md b/docs/apis/datasets/datasets.md similarity index 99% rename from docs/apis/transforms/datasets.md rename to docs/apis/datasets/datasets.md index dbd8058884..a3d672b8aa 100644 --- a/docs/apis/transforms/datasets.md +++ b/docs/apis/datasets/datasets.md @@ -1,3 +1,4 @@ +English | [简体中文](datasets_cn.md) # paddleseg.datasets - [Custom Dataset](#custom-dataset) - [Cityscapes](#Cityscapes) diff --git a/docs/apis/transforms/datasets_cn.md b/docs/apis/datasets/datasets_cn.md similarity index 99% rename from docs/apis/transforms/datasets_cn.md rename to docs/apis/datasets/datasets_cn.md index d0c461b745..df97e1b964 100644 --- a/docs/apis/transforms/datasets_cn.md +++ b/docs/apis/datasets/datasets_cn.md @@ -1,3 +1,4 @@ +简体中文 | [English](datasets.md) # paddleseg.datasets - [Custom Dataset](#custom-dataset) - [Cityscapes](#Cityscapes) diff --git a/docs/apis/transforms/transforms.md b/docs/apis/transforms/transforms.md index efc9fa5f4c..28940d36c0 100644 --- a/docs/apis/transforms/transforms.md +++ b/docs/apis/transforms/transforms.md @@ -1,3 +1,4 @@ +English | [简体中文](transforms_cn.md) # [paddleseg.transforms](../../../paddleseg/transforms/transforms.py) ## [Compose](../../../paddleseg/transforms/transforms.py) @@ -81,7 +82,7 @@ class paddleseg.transforms.ResizeStepScaling(min_scale_factor = 0.75, Scale an image proportionally within a range. ### Args -* **min_scale_factor**** (float, optional): The minimum scale. Default: 0.75. +* **min_scale_factor** (float, optional): The minimum scale. Default: 0.75. * **max_scale_factor** (float, optional): The maximum scale. Default: 1.25. * **scale_step_size** (float, optional): The scale interval. Default: 0.25. diff --git a/docs/apis/transforms/transforms_cn.md b/docs/apis/transforms/transforms_cn.md index 79762c8ce3..eef1e04372 100644 --- a/docs/apis/transforms/transforms_cn.md +++ b/docs/apis/transforms/transforms_cn.md @@ -1,3 +1,4 @@ +简体中文 | [English](transforms.md) # [paddleseg.transforms](../../../paddleseg/transforms/transforms.py) ## [Compose](../../../paddleseg/transforms/transforms.py) diff --git a/docs/data/custom/data_prepare.md b/docs/data/custom/data_prepare.md index 7937656114..66261c25df 100644 --- a/docs/data/custom/data_prepare.md +++ b/docs/data/custom/data_prepare.md @@ -1,9 +1,89 @@ -# 自定义数据集 +English|[简体中文](data_prepare_cn.md) +# Custom Dataset -如果您需要使用自定义数据集进行训练,请按照以下步骤准备数据. +## 1、How to Use Datasets -1.推荐整理成如下结构 +We want to write the path of the image to the three folders `train.txt`, `val.txt`, `test.txt` and `labels.txt`, because PaddleSeg locates the image by reading these text files Path. +The texts of `train.txt`, `val.txt` and `test.txt` are divided into two columns with spaces as separators. The first column is the relative path of the image file relative to the dataset, and the second column is the relative path of the image file The relative path of the dataset. As follows: +``` +images/xxx1.jpg (xx1.png) annotations/xxx1.png +images/xxx2.jpg (xx2.png) annotations/xxx2.png +... +``` +`labels.txt`: Each line has a separate category, and the corresponding line number is the id corresponding to the category (line number starts from 0), as shown below: +``` +labelA +labelB +... +``` + +## 2、Split Custom Dataset + +We all know that the training process of neural network models is usually divided into training set, validation set, and test set. If you are using a custom dataset, PaddleSeg supports splitting the dataset by running scripts. If your dataset has been divided into the above three types, you can skip this step. + +### 2.1 Original Image Requirements +The size of the original image data should be (h, w, channel), where h, w are the height and width of the image, and channel is the number of channels of the image. + +### 2.2 Annotation Requirements +The annotated image must be a single-channel image, the annotated image should be in png format. The pixel value is the corresponding category, and the pixel annotated category needs to increase from 0. +For example, 0, 1, 2, 3 means that there are 4 categories, and the maximum number of labeled categories is 256. Among them, you can specify a specific pixel value to indicate that the pixel of that value does not participate in training and evaluation (the default is 255). + +### 2.3 Spilit Custom Dataset and Generate File List + +For all data that is not divided into training set, validation set, and test set, PaddleSeg provides a script to generate segmented data and generate a file list. + +#### Use scripts to randomly split the custom dataset proportionally and generate a file list +The data file structure is as follows: +``` +./dataset/ # Dataset root directory +|--images # Original image catalog +| |--xxx1.jpg (xx1.png) +| |--... +| └--... +| +|--annotations # Annotated image catalog +| |--xxx1.png +| |--... +| └--... +``` + +Among them, the corresponding file name can be defined according to needs. + +The commands used are as follows, which supports enabling specific functions through different Flags. +``` +python tools/split_dataset_list.py ${FLAGS} +``` +Parameters: +- dataset_root: Dataset root directory +- images_dir_name: Original image catalog +- labels_dir_name: Annotated image catalog + +FLAGS: + +|FLAG|Meaning|Default|Parameter numbers| +|-|-|-|-| +|--split|Dataset segmentation ratio|0.7 0.3 0|3| +|--separator|File list separator|"|"|1| +|--format|Data format of pictures and label sets|"jpg" "png"|2| +|--label_class|Label category|'\_\_background\_\_' '\_\_foreground\_\_'|several| +|--postfix|Filter pictures and label sets according to whether the main file name (without extension) contains the specified suffix|"" ""(2 null characters)|2| + + +After running, `train.txt`, `val.txt`, `test.txt` and `labels.txt` will be generated in the root directory of the dataset. + +**Note:** Requirements for generating the file list: either the original image and the number of annotated images are the same, or there is only the original image without annotated images. If the dataset lacks annotated images, a file list without separators and annotated image paths will be generated. + +#### Example +``` +python tools/split_dataset_list.py images annotations --split 0.6 0.2 0.2 --format jpg png +``` + + + +## 3.Dataset file organization + +* If you need to use a custom dataset for training, it is recommended to organize it into the following structure: custom_dataset | |--images @@ -22,15 +102,30 @@ | |--test.txt -其中train.txt和val.txt的内容如下所示: +The contents of train.txt and val.txt are as follows: images/image1.jpg labels/label1.png images/image2.jpg labels/label2.png ... -2.标注图像的标签从0,1依次取值,不可间隔。若有需要忽略的像素,则按255进行标注。 +If you only have a divided dataset, you can generate a file list by executing the following script: +``` +# Generate a file list, the separator is a space, and the data format of the picture and the label set is png +python tools/create_dataset_list.py --separator " " --format png png +``` +``` +# Generate a list of files. The folders for pictures and tag sets are named img and gt, and the folders for training and validation sets are named training and validation. No test set list is generated. +python tools/create_dataset_list.py \ + --folder img gt --second_folder training validation +``` +**Note:** A custom dataset directory must be specified, and FLAG can be set as needed. There is no need to specify `--type`. +After running, `train.txt`, `val.txt`, `test.txt` and `labels.txt` will be generated in the root directory of the dataset. PaddleSeg locates the image path by reading these text files. + + + +* The labels of the annotated images are taken from 0, 1 in turn, and cannot be separated. If there are pixels that need to be ignored, they are labeled at 255. -可按如下方式对自定义数据集进行配置: +The custom dataset can be configured as follows: ```yaml train_dataset: type: Dataset @@ -48,4 +143,4 @@ train_dataset: - type: Normalize mode: train ``` -请注意**数据集路径和训练文件**的存放位置,按照代码中的dataset_root和train_path示例方式存放。 +Please pay attention to the storage location of **dataset path and training file**, according to the example of dataset_root and train_path in the code. diff --git a/docs/data/custom/data_prepare_cn.md b/docs/data/custom/data_prepare_cn.md new file mode 100644 index 0000000000..1d646ec8dc --- /dev/null +++ b/docs/data/custom/data_prepare_cn.md @@ -0,0 +1,143 @@ +简体中文|[English](data_prepare.md) +# 自定义数据集 + +## 1、如何使用数据集 +我们希望将图像的路径写入到`train.txt`,`val.txt`,`test.txt`和`labels.txt`三个文件夹中,因为PaddleSeg是通过读取这些文本文件来定位图像路径的。 +`train.txt`,`val.txt`和`test.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示: +``` +images/xxx1.jpg (xx1.png) annotations/xxx1.png +images/xxx2.jpg (xx2.png) annotations/xxx2.png +... +``` +`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示: +``` +labelA +labelB +... +``` + +## 2、切分自定义数据集 + +我们都知道,神经网络模型的训练过程通常要划分为训练集、验证集、测试集。如果你使用的是自定义数据集,PaddleSeg支持通过运行脚本的方式将数据集进行切分。如果你的数据集已经划分为以上三种,你可以跳过本步骤。 + +### 2.1 原图像要求 +原图像数据的尺寸应为(h, w, channel),其中h, w为图像的高和宽,channel为图像的通道数。 + +### 2.2 标注图要求 +标注图像必须为单通道图像,标注图应为`png`格式。像素值即为对应的类别,像素标注类别需要从0开始递增。 +例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。 + + +### 2.3 自定义数据集切分与文件列表生成 + +对于未划分为训练集、验证集、测试集的全部数据,PaddleSeg提供了生成切分数据并生成文件列表的脚本。 + +#### 使用脚本对自定义数据集按比例随机切分,并生成文件列表 +数据文件结构如下: +``` +./dataset/ # 数据集根目录 +|--images # 原图目录 +| |--xxx1.jpg (xx1.png) +| |--... +| └--... +| +|--annotations # 标注图目录 +| |--xxx1.png +| |--... +| └--... +``` +其中,相应的文件名可根据需要自行定义。 + +使用命令如下,支持通过不同的Flags来开启特定功能。 +``` +python tools/split_dataset_list.py ${FLAGS} +``` +参数说明: +- dataset_root: 数据集根目录 +- images_dir_name: 原图目录名 +- labels_dir_name: 标注图目录名 + +FLAGS说明: + +|FLAG|含义|默认值|参数数目| +|-|-|-|-| +|--split|数据集切分比例|0.7 0.3 0|3| +|--separator|文件列表分隔符|" "|1| +|--format|图片和标签集的数据格式|"jpg" "png"|2| +|--label_class|标注类别|'\_\_background\_\_' '\_\_foreground\_\_'|若干| +|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| + +运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`. + +**注:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,将生成不含分隔符和标注图片路径的文件列表。 + +#### 使用示例 +``` +python tools/split_dataset_list.py images annotations --split 0.6 0.2 0.2 --format jpg png +``` + + + +## 3.数据集文件整理 + +* 如果你需要使用自定义数据集进行训练,推荐整理成如下结构: + custom_dataset + | + |--images + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--labels + | |--label1.png + | |--label2.png + | |--... + | + |--train.txt + | + |--val.txt + | + |--test.txt + +其中train.txt和val.txt的内容如下所示: + + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + +如果你只有划分好的数据集,可以通过执行以下脚本生成文件列表: +``` +# 生成文件列表,其分隔符为空格,图片和标签集的数据格式都为png +python tools/create_dataset_list.py --separator " " --format png png +``` +``` +# 生成文件列表,其图片和标签集的文件夹名为img和gt,训练和验证集的文件夹名为training和validation,不生成测试集列表 +python tools/create_dataset_list.py \ + --folder img gt --second_folder training validation +``` +**注:** 必须指定自定义数据集目录,可以按需要设定FLAG。无需指定`--type`。 +运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`。PaddleSeg是通过读取这些文本文件来定位图像路径的。 + + + +* 标注图像的标签从0,1依次取值,不可间隔。若有需要忽略的像素,则按255进行标注。 + +可按如下方式对自定义数据集进行配置: +```yaml +train_dataset: + type: Dataset + dataset_root: custom_dataset + train_path: custom_dataset/train.txt + num_classes: 2 + transforms: + - type: ResizeStepScaling + min_scale_factor: 0.5 + max_scale_factor: 2.0 + scale_step_size: 0.25 + - type: RandomPaddingCrop + crop_size: [512, 512] + - type: RandomHorizontalFlip + - type: Normalize + mode: train +``` +请注意**数据集路径和训练文件**的存放位置,按照代码中的dataset_root和train_path示例方式存放。 diff --git a/docs/data/marker/marker.md b/docs/data/marker/marker.md index b97fe248a9..e95fc10542 100644 --- a/docs/data/marker/marker.md +++ b/docs/data/marker/marker.md @@ -1,100 +1,210 @@ -# 数据格式说明 +English|[简体中文](marker_cn.md) +# Preparation of Annotation Data -## 数据标注 +## 1、Pre-knowledge -### 标注协议 -PaddleSeg采用单通道的标注图片,每一种像素值代表一种类别,像素标注类别需要从0开始递增,例如0,1,2,3表示有4种类别。 +### 1.1 Annotation Protocal +PaddleSeg uses a single-channel annotated image, and each pixel value represents a category, and the pixel label category needs to increase from 0. For example, 0, 1, 2, 3 indicate that there are 4 categories. -**NOTE:** 标注图像请使用PNG无损压缩格式的图片。标注类别最多为256类。 +**NOTE:** Please use PNG lossless compression format for annotated images. The maximum number of label categories is 256. -### 灰度标注vs伪彩色标注 -一般的分割库使用单通道灰度图作为标注图片,往往显示出来是全黑的效果。灰度标注图的弊端: -1. 对图像标注后,无法直接观察标注是否正确。 -2. 模型测试过程无法直接判断分割的实际效果。 +### 1.2 Grayscale Annotation VS Pseudo-color Annotation +The general segmentation library uses a single-channel grayscale image as the annotated image, and it often shows a completely black effect. Disadvantages of gray scale annotated map: +1. After annotating an image, it is impossible to directly observe whether the annotation is correct. +2. The actual effect of segmentation cannot be directly judged during the model testing process. -**PaddleSeg支持伪彩色图作为标注图片,在原来的单通道图片基础上,注入调色板。在基本不增加图片大小的基础上,却可以显示出彩色的效果。** +**PaddleSeg supports pseudo-color images as annotated images, and injects palettes on the basis of the original single-channel images. On the basis of basically not increasing the size of the picture, it can show a colorful effect.** -同时PaddleSeg也兼容灰度图标注,用户原来的灰度数据集可以不做修改,直接使用。 +At the same time, PaddleSeg is also compatible with gray-scale icon annotations. The user's original gray-scale dataset can be used directly without modification. ![](../image/image-11.png) -### 灰度标注转换为伪彩色标注 -如果用户需要转换成伪彩色标注图,可使用我们的转换工具。适用于以下两种常见的情况: -1. 如果您希望将指定目录下的所有灰度标注图转换为伪彩色标注图,则执行以下命令,指定灰度标注所在的目录即可。 +### 1.3 Convert grayscale annotations to pseudo-color annotations +If users need to convert to pseudo-color annotation maps, they can use our conversion tool. Applies to the following two common situations: +1. If you want to convert all grayscale annotation images in a specified directory to pseudo-color annotation images, execute the following command to specify the directory where the grayscale annotations are located. ```buildoutcfg -python pdseg/tools/gray2pseudo_color.py +python tools/gray2pseudo_color.py ``` -|参数|用途| +|Parameter|Effection| |-|-| -|dir_or_file|指定灰度标注所在目录| -|output_dir|彩色标注图片的输出目录| +|dir_or_file|Specify the directory where gray scale labels are located| +|output_dir|Output directory of color-labeled pictures| -2. 如果您仅希望将指定数据集中的部分灰度标注图转换为伪彩色标注图,则执行以下命令,需要已有文件列表,按列表读取指定图片。 +2. If you only want to convert part of the gray scale annotated image in the specified dataset to pseudo-color annotated image, execute the following command, you need an existing file list, and read the specified image according to the list. ```buildoutcfg -python pdseg/tools/gray2pseudo_color.py --dataset_dir --file_separator +python tools/gray2pseudo_color.py --dataset_dir --file_separator ``` - -|参数|用途| +|Parameter|Effection| |-|-| -|dir_or_file|指定文件列表路径| -|output_dir|彩色标注图片的输出目录| -|--dataset_dir|数据集所在根目录| -|--file_separator|文件列表分隔符| +|dir_or_file|Specify the directory where gray scale labels are located| +|output_dir|Output directory of color-labeled pictures| +|--dataset_dir|The root directory where the dataset is located| +|--file_separator|File list separator| + + +### 1.4 How PaddleSeg uses datasets +We want to write the path of the image to the three folders `train.txt`, `val.txt`, `test.txt` and `labels.txt`, because PaddleSeg locates the image by reading these text files Path. + +The texts of `train.txt`, `val.txt` and `test.txt` are divided into two columns with spaces as separators. The first column is the relative path of the image file relative to the dataset, and the second column is the relative path of the image file The relative path of the dataset. As follows: + +``` +images/xxx1.jpg annotations/xxx1.png +images/xxx2.jpg annotations/xxx2.png +... +``` +`labels.txt`: Each row has a separate category, and the corresponding row number is the id corresponding to the category (the row number starts from 0), as shown below: +``` +labelA +labelB +... +``` + + +## 2、Annotate custom datasets +If you want to use a custom dataset, you need to collect images for training, evaluation, and testing in advance, and then use the data annotation tool to complete the data annotation. If you want to use ready-made datasets such as Cityscapes and Pascal VOC, you can skip this step. + +PaddleSeg already supports 2 kinds of labeling tools: `LabelMe`, and `EISeg`. The annotation tutorial is as follows: + +- [LabelMe Tutorial](../transform/transform_cn.md) +- [EISeg Tutorial](../../../contrib/EISeg/README.md) + +After annotating with the above tools, please store all annotated images in the annotations folder, and then proceed to the next step. + + +## 3、Split a custom dataset + +We all know that the training process of neural network models is usually divided into training set, validation set, and test set. If you are using a custom dataset, PaddleSeg supports splitting the dataset by running scripts. If you want to use ready-made datasets such as Cityscapes and Pascal VOC, you can skip this step. + +### 3.1 Original image requirements +The size of the original image data should be (h, w, channel), where h, w are the height and width of the image, and channel is the number of channels of the image. + +### 3.2 Annotation image requirements +The annotated image must be a single-channel image, the pixel value is the corresponding category, and the pixel annotated category needs to increase from 0. +For example, 0, 1, 2, 3 means that there are 4 categories, and the maximum number of labeled categories is 256. Among them, you can specify a specific pixel value to indicate that the pixel of that value does not participate in training and evaluation (the default is 255). -### 标注教程 -用户需预先采集好用于训练、评估和测试的图片,然后使用数据标注工具完成数据标注。 -PddleSeg已支持2种标注工具:LabelMe、精灵数据标注工具。标注教程如下: +### 3.3 Custom dataset segmentation and file list generation -- [LabelMe标注教程](../transform/transform.md) -- [精灵数据标注工具教程](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/jingling2seg.md) +For all data that is not divided into training set, validation set, and test set, PaddleSeg provides a script to generate segmented data and generate a file list. +If your dataset has been segmented like Cityscapes, Pascal VOC, etc., please skip to section 4. Otherwise, please refer to the following tutorials: -## 文件列表 +### Use scripts to randomly split the custom dataset proportionally and generate a file list +The data file structure is as follows: +``` +./dataset/ # Dataset root directory +|--images # Original image catalog +| |--xxx1.jpg +| |--... +| └--... +| +|--annotations # Annotated image catalog +| |--xxx1.png +| |--... +| └--... +``` -### 文件列表规范 -PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试集。在训练、评估、可视化过程前必须准备好相应的文件列表。 +Among them, the corresponding file name can be defined according to needs. -文件列表组织形式如下 +The commands used are as follows, which supports enabling specific functions through different Flags. ``` -原始图片路径 [SEP] 标注图片路径 +python tools/split_dataset_list.py ${FLAGS} ``` +Parameters: +- dataset_root: Dataset root directory +- images_dir_name: Original image catalog +- labels_dir_name: Annotated image catalog + +FLAGS: + +|FLAG|Meaning|Default|Parameter numbers| +|-|-|-|-| +|--split|Dataset segmentation ratio|0.7 0.3 0|3| +|--separator|File list separator|"|"|1| +|--format|Data format of pictures and label sets|"jpg" "png"|2| +|--label_class|Label category|'\_\_background\_\_' '\_\_foreground\_\_'|several| +|--postfix|Filter pictures and label sets according to whether the main file name (without extension) contains the specified suffix|"" ""(2 null characters)|2| -其中`[SEP]`是文件路径分割符,可以在`DATASET.SEPARATOR`配置项中修改, 默认为空格。文件列表的路径以数据集根目录作为相对路径起始点,`DATASET.DATA_DIR`即为数据集根目录。 -如下图所示,左边为原图的图片路径,右边为图片对应的标注路径。 -![](../image/file_list.png) +After running, `train.txt`, `val.txt`, `test.txt` and `labels.txt` will be generated in the root directory of the dataset. + +**Note:** Requirements for generating the file list: either the original image and the number of annotated images are the same, or there is only the original image without annotated images. If the dataset lacks annotated images, a file list without separators and annotated image paths will be generated. + +#### Example +``` +python tools/split_dataset_list.py images annotations --split 0.6 0.2 0.2 --format jpg png +``` + -**注意事项** -* 务必保证分隔符在文件列表中每行只存在一次, 如文件名中存在空格,请使用"|"等文件名不可用字符进行切分 +## 4、Dataset file organization -* 文件列表请使用**UTF-8**格式保存, PaddleSeg默认使用UTF-8编码读取file_list文件 +PaddleSeg uses a common file list method to organize training set, validation set and test set. The corresponding file list must be prepared before the training, evaluation, and visualization process. -若数据集缺少标注图片,则文件列表不用包含分隔符和标注图片路径,如下图所示。 +It is recommended to organize it into the following structure: -![](../image/file_list2.png) + custom_dataset + | + |--images + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--labels + | |--label1.png + | |--label2.png + | |--... + | + |--train.txt + | + |--val.txt + | + |--test.txt -**注意事项** +### 4.1 File List Specification(Training,Evaluating) -此时的文件列表仅可在调用`pdseg/vis.py`进行可视化展示时使用, -即仅可在`DATASET.TEST_FILE_LIST`和`DATASET.VIS_FILE_LIST`配置项中使用。 -不可在`DATASET.TRAIN_FILE_LIST`和`DATASET.VAL_FILE_LIST`配置项中使用。 +- During training and evaluating, annotated images are required. +- That is, the contents of `train.txt` and `val.txt` are as follows: + ``` + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + ``` -**符合规范的文件列表是什么样的呢?** +Among them, `image1.jpg` and `label1.png` are the original image and its corresponding annotated image, respectively. For the content specification in `test.txt`, please refer to [Section 4.2](#4.2-File-List-Specification-(Predicting)). -请参考[目录](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/cityscapes_demo)。 +**NOTE** -### 数据集目录结构整理 +* Make sure that the separator exists only once per line in the file list. If there are spaces in the file name, please use "|" and other unusable characters in the file name to split. -如果用户想要生成数据集的文件列表,需要整理成如下的目录结构(类似于Cityscapes数据集): +* Please save the file list in **UTF-8** format, PaddleSeg uses UTF-8 encoding to read file_list files by default. + +* You need to ensure that the separator of the file list is consistent with your Dataset class. The default separator is a `space`. + +### 4.2 File List Specification (Predicting) +- During predicting, the model uses only the original image. + +- That is, the content of `test.txt` is as follows: + ``` + images/image1.jpg + images/image2.jpg + ... + ``` + +- When calling `predict.py` for visual display, annotated images can be included in the file list. During predicting, the model will automatically ignore the annotated images given in the file list. Therefore, you can make predictions on the training and validatsion datasets without modifying the contents of the `train.txt` and `val.txt` files mentioned in +[Section 4.1](#4.1-File-List-Specification(Training,Evaluating)). + + +### 4.3 Organize the dataset directory structure + +If the user wants to generate a file list of the dataset, it needs to be organized into the following directory structure (similar to the Cityscapes dataset). You can divide it manually, or refer to the method of automatic segmentation using scripts in Section 3. ``` -./dataset/ # 数据集根目录 -├── annotations # 标注目录 +./dataset/ # Dataset root directory +├── annotations # Annotated image catalog │   ├── test │   │   ├── ... │   │   └── ... @@ -104,7 +214,7 @@ PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试 │   └── val │   ├── ... │   └── ... -└── images # 原图目录 +└── images # Original image catalog ├── test │   ├── ... │   └── ... @@ -114,63 +224,66 @@ PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试 └── val ├── ... └── ... -Note:以上目录名可任意 +Note:The above directory name can be any ``` -### 文件列表生成 -PaddleSeg提供了生成文件列表的使用脚本,可适用于自定义数据集或cityscapes数据集,并支持通过不同的Flags来开启特定功能。 +### 4.4 Generate file list +PaddleSeg provides a script for generating file lists, which can be applied to custom datasets or cityscapes datasets, and supports different Flags to enable specific functions. ``` -python pdseg/tools/create_dataset_list.py ${FLAGS} +python tools/create_dataset_list.py ${FLAGS} ``` -运行后将在数据集根目录下生成训练/验证/测试集的文件列表(文件主名与`--second_folder`一致,扩展名为`.txt`)。 +After running, a file list of the training/validation/test set will be generated in the root directory of the dataset (the main name of the file is the same as `--second_folder`, and the extension is `.txt`). -**Note:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,仍可自动生成不含分隔符和标注图片路径的文件列表。 +**Note:** Requirements for generating the file list: either the original image and the number of annotated images are the same, or there is only the original image without annotated images. If the dataset lacks annotated images, a file list without separators and annotated image paths can still be automatically generated. -#### 命令行FLAGS列表 +#### FLAGS list -|FLAG|用途|默认值|参数数目| +|FLAG|Effection|Default|Parameter numbers| |-|-|-|-| -|--type|指定数据集类型,`cityscapes`或`自定义`|`自定义`|1| -|--separator|文件列表分隔符|"|"|1| -|--folder|图片和标签集的文件夹名|"images" "annotations"|2| -|--second_folder|训练/验证/测试集的文件夹名|"train" "val" "test"|若干| -|--format|图片和标签集的数据格式|"jpg" "png"|2| -|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| +|--type|Specify the dataset type, `cityscapes` or `custom`|`custom`|1| +|--separator|File list separator|"|"|1| +|--folder|Folder name for pictures and label sets|"images" "annotations"|2| +|--second_folder|The folder name of the training/validation/test set|"train" "val" "test"|several| +|--format|Data format of pictures and label sets|"jpg" "png"|2| +|--postfix|Filter pictures and label sets according to whether the main file name (without extension) contains the specified suffix|"" ""(2 null characters)|2| -#### 使用示例 -- **对于自定义数据集** +#### Example +- **For custom datasets** -若您已经按上述说明整理好了数据集目录结构,可以运行下面的命令生成文件列表。 +If you have organized the dataset directory structure according to the above instructions, you can run the following command to generate a file list. ``` -# 生成文件列表,其分隔符为空格,图片和标签集的数据格式都为png -python pdseg/tools/create_dataset_list.py --separator " " --format png png +# Generate a file list, the separator is a space, and the data format of the picture and the label set is png +python tools/create_dataset_list.py --separator " " --format jpg png ``` ``` -# 生成文件列表,其图片和标签集的文件夹名为img和gt,训练和验证集的文件夹名为training和validation,不生成测试集列表 -python pdseg/tools/create_dataset_list.py \ +# Generate a list of files. The folders for pictures and tag sets are named img and gt, and the folders for training and validation sets are named training and validation. No test set list is generated. +python tools/create_dataset_list.py \ --folder img gt --second_folder training validation ``` -**Note:** 必须指定自定义数据集目录,可以按需要设定FLAG。无需指定`--type`。 +**Note:** A custom dataset directory must be specified, and FLAG can be set as needed. There is no need to specify `--type`. -- **对于cityscapes数据集** +- **For the cityscapes dataset** -若您使用的是cityscapes数据集,可以运行下面的命令生成文件列表。 +If you are using the cityscapes dataset, you can run the following command to generate a file list. ``` -# 生成cityscapes文件列表,其分隔符为逗号 -python pdseg/tools/create_dataset_list.py --type cityscapes --separator "," +# Generate a list of cityscapes files with a comma as the separator +python tools/create_dataset_list.py --type cityscapes --separator "," ``` **Note:** -必须指定cityscapes数据集目录,`--type`必须为`cityscapes`。 +The cityscapes dataset directory must be specified, and `--type` must be `cityscapes`. -在cityscapes类型下,部分FLAG将被重新设定,无需手动指定,具体如下: +Under the cityscapes type, part of the FLAG will be reset, no need to specify manually, as follows: -|FLAG|固定值| +|FLAG|Fixed value| |-|-| |--folder|"leftImg8bit" "gtFine"| -|--format|"png" "png"| +|--format|"jpg" "png"| |--postfix|"_leftImg8bit" "_gtFine_labelTrainIds"| -其余FLAG可以按需要设定。 +The remaining FLAG can be set as required. + + +After running, `train.txt`, `val.txt`, `test.txt` and `labels.txt` will be generated in the root directory of the dataset. PaddleSeg locates the image path by reading these text files. diff --git a/docs/data/marker/marker_c.md b/docs/data/marker/marker_c.md deleted file mode 100644 index 6e1a69fa45..0000000000 --- a/docs/data/marker/marker_c.md +++ /dev/null @@ -1,175 +0,0 @@ -# 数据格式说明 - -## 数据标注 - -### 标注协议 -PaddleSeg采用单通道的标注图片,每一种像素值代表一种类别,像素标注类别需要从0开始递增,例如0,1,2,3表示有4种类别。 - -**NOTE:** 标注图像请使用PNG无损压缩格式的图片。标注类别最多为256类。 - -### 灰度标注vs伪彩色标注 -一般的分割库使用单通道灰度图作为标注图片,往往显示出来是全黑的效果。灰度标注图的弊端: -1. 对图像标注后,无法直接观察标注是否正确。 -2. 模型测试过程无法直接判断分割的实际效果。 - -**PaddleSeg支持伪彩色图作为标注图片,在原来的单通道图片基础上,注入调色板。在基本不增加图片大小的基础上,却可以显示出彩色的效果。** - -同时PaddleSeg也兼容灰度图标注,用户原来的灰度数据集可以不做修改,直接使用。 -![](../image/image-11.png) - -### 灰度标注转换为伪彩色标注 -如果用户需要转换成伪彩色标注图,可使用我们的转换工具。适用于以下两种常见的情况: -1. 如果您希望将指定目录下的所有灰度标注图转换为伪彩色标注图,则执行以下命令,指定灰度标注所在的目录即可。 -```buildoutcfg -python pdseg/tools/gray2pseudo_color.py -``` - -|参数|用途| -|-|-| -|dir_or_file|指定灰度标注所在目录| -|output_dir|彩色标注图片的输出目录| - -2. 如果您仅希望将指定数据集中的部分灰度标注图转换为伪彩色标注图,则执行以下命令,需要已有文件列表,按列表读取指定图片。 -```buildoutcfg -python pdseg/tools/gray2pseudo_color.py --dataset_dir --file_separator -``` -|参数|用途| -|-|-| -|dir_or_file|指定文件列表路径| -|output_dir|彩色标注图片的输出目录| -|--dataset_dir|数据集所在根目录| -|--file_separator|文件列表分隔符| - -### 标注教程 -用户需预先采集好用于训练、评估和测试的图片,然后使用数据标注工具完成数据标注。 - -PddleSeg已支持2种标注工具:LabelMe、精灵数据标注工具。标注教程如下: - -- [LabelMe标注教程](../transform/transform_c.md) -- [精灵数据标注工具教程](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/jingling2seg.md) - - -## 文件列表 - -### 文件列表规范 - -PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试集。在训练、评估、可视化过程前必须准备好相应的文件列表。 - -文件列表组织形式如下 -``` -原始图片路径 [SEP] 标注图片路径 -``` - -其中`[SEP]`是文件路径分割符,可以在`DATASET.SEPARATOR`配置项中修改, 默认为空格。文件列表的路径以数据集根目录作为相对路径起始点,`DATASET.DATA_DIR`即为数据集根目录。 - -如下图所示,左边为原图的图片路径,右边为图片对应的标注路径。 - -![](../image/file_list.png) - -**注意事项** - -* 务必保证分隔符在文件列表中每行只存在一次, 如文件名中存在空格,请使用"|"等文件名不可用字符进行切分 - -* 文件列表请使用**UTF-8**格式保存, PaddleSeg默认使用UTF-8编码读取file_list文件 - -若数据集缺少标注图片,则文件列表不用包含分隔符和标注图片路径,如下图所示。 - -![](../image/file_list2.png) - -**注意事项** - -此时的文件列表仅可在调用`pdseg/vis.py`进行可视化展示时使用, -即仅可在`DATASET.TEST_FILE_LIST`和`DATASET.VIS_FILE_LIST`配置项中使用。 -不可在`DATASET.TRAIN_FILE_LIST`和`DATASET.VAL_FILE_LIST`配置项中使用。 - - -**符合规范的文件列表是什么样的呢?** - -请参考[目录](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/cityscapes_demo)。 - -### 数据集目录结构整理 - -如果用户想要生成数据集的文件列表,需要整理成如下的目录结构(类似于Cityscapes数据集): - -``` -./dataset/ # 数据集根目录 -├── annotations # 标注目录 -│   ├── test -│   │   ├── ... -│   │   └── ... -│   ├── train -│   │   ├── ... -│   │   └── ... -│   └── val -│   ├── ... -│   └── ... -└── images # 原图目录 - ├── test - │   ├── ... - │   └── ... - ├── train - │   ├── ... - │   └── ... - └── val - ├── ... - └── ... -Note:以上目录名可任意 -``` - -### 文件列表生成 -PaddleSeg提供了生成文件列表的使用脚本,可适用于自定义数据集或cityscapes数据集,并支持通过不同的Flags来开启特定功能。 -``` -python pdseg/tools/create_dataset_list.py ${FLAGS} -``` -运行后将在数据集根目录下生成训练/验证/测试集的文件列表(文件主名与`--second_folder`一致,扩展名为`.txt`)。 - -**Note:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,仍可自动生成不含分隔符和标注图片路径的文件列表。 - -#### 命令行FLAGS列表 - -|FLAG|用途|默认值|参数数目| -|-|-|-|-| -|--type|指定数据集类型,`cityscapes`或`自定义`|`自定义`|1| -|--separator|文件列表分隔符|"|"|1| -|--folder|图片和标签集的文件夹名|"images" "annotations"|2| -|--second_folder|训练/验证/测试集的文件夹名|"train" "val" "test"|若干| -|--format|图片和标签集的数据格式|"jpg" "png"|2| -|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| - -#### 使用示例 -- **对于自定义数据集** - -若您已经按上述说明整理好了数据集目录结构,可以运行下面的命令生成文件列表。 - -``` -# 生成文件列表,其分隔符为空格,图片和标签集的数据格式都为png -python pdseg/tools/create_dataset_list.py --separator " " --format png png -``` -``` -# 生成文件列表,其图片和标签集的文件夹名为img和gt,训练和验证集的文件夹名为training和validation,不生成测试集列表 -python pdseg/tools/create_dataset_list.py \ - --folder img gt --second_folder training validation -``` -**Note:** 必须指定自定义数据集目录,可以按需要设定FLAG。无需指定`--type`。 - -- **对于cityscapes数据集** - -若您使用的是cityscapes数据集,可以运行下面的命令生成文件列表。 - -``` -# 生成cityscapes文件列表,其分隔符为逗号 -python pdseg/tools/create_dataset_list.py --type cityscapes --separator "," -``` -**Note:** - -必须指定cityscapes数据集目录,`--type`必须为`cityscapes`。 - -在cityscapes类型下,部分FLAG将被重新设定,无需手动指定,具体如下: - -|FLAG|固定值| -|-|-| -|--folder|"leftImg8bit" "gtFine"| -|--format|"png" "png"| -|--postfix|"_leftImg8bit" "_gtFine_labelTrainIds"| - -其余FLAG可以按需要设定。 diff --git a/docs/data/marker/marker_cn.md b/docs/data/marker/marker_cn.md new file mode 100644 index 0000000000..ebf96f6839 --- /dev/null +++ b/docs/data/marker/marker_cn.md @@ -0,0 +1,287 @@ +简体中文|[English](marker.md) +# 标注数据的准备 + +## 1、数据标注基础知识 + +### 1.1 标注协议 +PaddleSeg采用单通道的标注图片,每一种像素值代表一种类别,像素标注类别需要从0开始递增,例如0,1,2,3表示有4种类别。 + +**注:** 标注图像请使用PNG无损压缩格式的图片。标注类别最多为256类。 + +### 1.2 灰度标注vs伪彩色标注 +一般的分割库使用单通道灰度图作为标注图片,往往显示出来是全黑的效果。灰度标注图的弊端: +1. 对图像标注后,无法直接观察标注是否正确。 +2. 模型测试过程无法直接判断分割的实际效果。 + +**PaddleSeg支持伪彩色图作为标注图片,在原来的单通道图片基础上,注入调色板。在基本不增加图片大小的基础上,却可以显示出彩色的效果。** + +同时PaddleSeg也兼容灰度图标注,用户原来的灰度数据集可以不做修改,直接使用。 +![](../image/image-11.png) + +### 1.3 灰度标注转换为伪彩色标注 +如果用户需要转换成伪彩色标注图,可使用我们的转换工具。适用于以下两种常见的情况: +1. 如果您希望将指定目录下的所有灰度标注图转换为伪彩色标注图,则执行以下命令,指定灰度标注所在的目录即可。 +```buildoutcfg +python tools/gray2pseudo_color.py +``` + +|参数|用途| +|-|-| +|dir_or_file|指定灰度标注所在目录| +|output_dir|彩色标注图片的输出目录| + +2. 如果您仅希望将指定数据集中的部分灰度标注图转换为伪彩色标注图,则执行以下命令,需要已有文件列表,按列表读取指定图片。 +```buildoutcfg +python tools/gray2pseudo_color.py --dataset_dir --file_separator +``` +|参数|用途| +|-|-| +|dir_or_file|指定文件列表路径| +|output_dir|彩色标注图片的输出目录| +|--dataset_dir|数据集所在根目录| +|--file_separator|文件列表分隔符| + + +### 1.4 PaddleSeg如何使用数据集 +我们希望将图像的路径写入到`train.txt`,`val.txt`,`test.txt`和`labels.txt`三个文件夹中,因为PaddleSeg是通过读取这些文本文件来定位图像路径的。 +`train.txt`,`val.txt`和`test.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示: +``` +images/xxx1.jpg annotations/xxx1.png +images/xxx2.jpg annotations/xxx2.png +... +``` +`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示: +``` +labelA +labelB +... +``` + + +## 2、标注自定义数据集 +如果你想使用自定义数据集,你需预先采集好用于训练、评估和测试的图像,然后使用数据标注工具完成数据标注。若你想要使用Cityscapes、Pascal VOC等现成数据集,你可以跳过本步骤。 + +PddleSeg已支持2种标注工具:LabelMe、EISeg交互式分割标注工具。标注教程如下: + +- [LabelMe标注教程](../transform/transform_cn.md) +- [EISeg交互式分割标注工具教程](../../../contrib/EISeg/README.md) + +经以上工具进行标注后,请将所有的标注图像统一存放在annotations文件夹内,然后进行下一步。 + + +## 3、切分自定义数据集 + + +我们都知道,神经网络模型的训练过程通常要划分为训练集、验证集、测试集。如果你使用的是自定义数据集,PaddleSeg支持通过运行脚本的方式将数据集进行切分。若你想要使用Cityscapes、Pascal VOC等现成数据集,你可以跳过本步骤。 + +### 3.1 原图像要求 +原图像数据的尺寸应为(h, w, channel),其中h, w为图像的高和宽,channel为图像的通道数。 + +### 3.2 标注图要求 +标注图像必须为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增。 +例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。 + + +### 3.3 自定义数据集切分与文件列表生成 + +对于未划分为训练集、验证集、测试集的全部数据,PaddleSeg提供了生成切分数据并生成文件列表的脚本。 +如果你的数据集已经像 Cityscapes、Pascal VOC等一样切分完成,请直接跳到第4节。否则,请参阅以下教程: + + +### 使用脚本对自定义数据集按比例随机切分,并生成文件列表 +数据文件结构如下: +``` +./dataset/ # 数据集根目录 +|--images # 原图目录 +| |--xxx1.jpg +| |--... +| └--... +| +|--annotations # 标注图目录 +| |--xxx1.png +| |--... +| └--... +``` +其中,相应的文件名可根据需要自行定义。 + +使用命令如下,支持通过不同的Flags来开启特定功能。 +``` +python tools/split_dataset_list.py ${FLAGS} +``` +参数说明: +- dataset_root: 数据集根目录 +- images_dir_name: 原图目录名 +- labels_dir_name: 标注图目录名 + +FLAGS说明: + +|FLAG|含义|默认值|参数数目| +|-|-|-|-| +|--split|数据集切分比例|0.7 0.3 0|3| +|--separator|文件列表分隔符|" "|1| +|--format|图片和标签集的数据格式|"jpg" "png"|2| +|--label_class|标注类别|'\_\_background\_\_' '\_\_foreground\_\_'|若干| +|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| + +运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`. + +**注:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,将生成不含分隔符和标注图片路径的文件列表。 + +#### 使用示例 +``` +python tools/split_dataset_list.py images annotations --split 0.6 0.2 0.2 --format jpg png +``` + + +## 4、数据集文件整理 + +PaddleSeg采用通用的文件列表方式组织训练集、验证集和测试集。在训练、评估、可视化过程前必须准备好相应的文件列表。 + +推荐整理成如下结构: + + custom_dataset + | + |--images + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--labels + | |--label1.png + | |--label2.png + | |--... + | + |--train.txt + | + |--val.txt + | + |--test.txt + +### 4.1 文件列表规范(训练、验证) + +- 在训练与验证时,均需要提供标注图像。 + +- 即 `train.txt` 和 `val.txt` 的内容如下所示: + ``` + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + ``` + +其中 `image1.jpg` 与 `label1.png` 分别为原始图像与其对应的标注图像。关于 `test.txt` 中的内容规范,请参照[4.2节](#4.2-文件列表规范(预测))。 + + +**注意事项** + +* 务必保证分隔符在文件列表中每行只存在一次, 如文件名中存在空格,请使用"|"等文件名不可用字符进行切分。 + +* 文件列表请使用**UTF-8**格式保存, PaddleSeg默认使用UTF-8编码读取file_list文件。 + +* 需要保证文件列表的分割符与你的Dataset类保持一致,默认分割符为`空格`。 + + +### 4.2 文件列表规范(预测) + +- 在执行预测时,模型仅使用原始图像。 + +- 即 `test.txt` 的内容如下所示: + ``` + images/image1.jpg + images/image2.jpg + ... + ``` + +- 在调用`predict.py`进行可视化展示时,文件列表中可以包含标注图像。在预测时,模型将自动忽略文件列表中给出的标注图像。因此,你可以直接使用训练、验证数据集进行预测,而不必修改 [4.1节](#4.1-文件列表规范(训练、验证))里 `train.txt` 和 `val.txt` 文件中的内容。 + + + +### 4.3 数据集目录结构整理 + +如果用户想要生成数据集的文件列表,需要整理成如下的目录结构(类似于Cityscapes数据集)。你可以手动划分,亦可参照第3节中使用脚本自动切分生成的方式。 + +``` +./dataset/ # 数据集根目录 +├── annotations # 标注图像目录 +│   ├── test +│   │   ├── ... +│   │   └── ... +│   ├── train +│   │   ├── ... +│   │   └── ... +│   └── val +│   ├── ... +│   └── ... +└── images # 原图像目录 + ├── test + │   ├── ... + │   └── ... + ├── train + │   ├── ... + │   └── ... + └── val + ├── ... + └── ... +注:以上目录名可任意 +``` + +### 4.4 生成文件列表 +PaddleSeg提供了生成文件列表的使用脚本,可适用于自定义数据集或cityscapes数据集,并支持通过不同的Flags来开启特定功能。 +``` +python tools/create_dataset_list.py ${FLAGS} +``` +运行后将在数据集根目录下生成训练/验证/测试集的文件列表(文件主名与`--second_folder`一致,扩展名为`.txt`)。 + +**注:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,仍可自动生成不含分隔符和标注图片路径的文件列表。 + +#### 命令行FLAGS列表 + +|FLAG|用途|默认值|参数数目| +|-|-|-|-| +|--type|指定数据集类型,`cityscapes`或`自定义`|`自定义`|1| +|--separator|文件列表分隔符|"|"|1| +|--folder|图片和标签集的文件夹名|"images" "annotations"|2| +|--second_folder|训练/验证/测试集的文件夹名|"train" "val" "test"|若干| +|--format|图片和标签集的数据格式|"jpg" "png"|2| +|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| + +#### 使用示例 +- **对于自定义数据集** + +若您已经按上述说明整理好了数据集目录结构,可以运行下面的命令生成文件列表。 + +``` +# 生成文件列表,其分隔符为空格,图片和标签集的数据格式都为png +python tools/create_dataset_list.py --separator " " --format jpg png +``` +``` +# 生成文件列表,其图片和标签集的文件夹名为img和gt,训练和验证集的文件夹名为training和validation,不生成测试集列表 +python tools/create_dataset_list.py \ + --folder img gt --second_folder training validation +``` +**注:** 必须指定自定义数据集目录,可以按需要设定FLAG。无需指定`--type`。 + +- **对于cityscapes数据集** + +若您使用的是cityscapes数据集,可以运行下面的命令生成文件列表。 + +``` +# 生成cityscapes文件列表,其分隔符为逗号 +python tools/create_dataset_list.py --type cityscapes --separator "," +``` +**注:** + +必须指定cityscapes数据集目录,`--type`必须为`cityscapes`。 + +在cityscapes类型下,部分FLAG将被重新设定,无需手动指定,具体如下: + +|FLAG|固定值| +|-|-| +|--folder|"leftImg8bit" "gtFine"| +|--format|"jpg" "png"| +|--postfix|"_leftImg8bit" "_gtFine_labelTrainIds"| + +其余FLAG可以按需要设定。 + + + +运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`。PaddleSeg是通过读取这些文本文件来定位图像路径的。 diff --git a/docs/data/transform/transform.md b/docs/data/transform/transform.md index a706d0746a..e53df1dde3 100644 --- a/docs/data/transform/transform.md +++ b/docs/data/transform/transform.md @@ -1,104 +1,106 @@ -# 数据标注和转换 +English|[简体中文](transform_cn.md) +# Annotating Tutorial -以LabelMe为例说明数据标注的流程 +Whether it is semantic segmentation, panoramic segmentation, or instance segmentation, we all need sufficient training data. If you want to use an unlabeled original dataset for segmentation tasks, you must first annotate the original image. If you are using a dataset with segmentation annotations such as `Cityscapes`, you can skip this step. +- This document will provide tutorials for using 2 kinds of labeling tools: EISeg, LabelMe. -## 1 LabelMe的安装 -用户在采集完用于训练、评估和预测的图片之后,需使用数据标注工具[LabelMe](https://github.com/wkentaro/labelme)完成数据标注。LabelMe支持在Windows/macOS/Linux三个系统上使用,且三个系统下的标注格式是一样。具体的安装流程请参见[官方安装指南](https://github.com/wkentaro/labelme)。 +# 1、LabelMe +* 1.Installation -## 2 LabelMe的使用 +After the user has collected the images for training, evaluation, and prediction, he needs to use the data labeling tool [LabelMe] (https://github.com/wkentaro/labelme) to complete the data labeling. LabelMe supports the use of Windows/macOS/Linux three systems, and the label format under the three systems is the same. For the specific installation process, please refer to the [Official Installation Guide](https://github.com/wkentaro/labelme). -打开终端输入`labelme`会出现LableMe的交互界面,可以先预览`LabelMe`给出的已标注好的图片,再开始标注自定义数据集。 +* 2.Using + +Open the terminal and enter `labelme`, and an interactive interface of LabelMe will appear. You can preview the marked image given by `LabelMe` before starting to label the custom dataset. ![](../image/image-1.png)

-

图1 LableMe交互界面的示意图

+

Figure 1 : Schematic diagram of LableMe interactive interface

+ * Preview annotated Images -* 预览已标注图片 - -获取`LabelMe`的源码: +Get `LabelMe` source code: ``` git clone https://github.com/wkentaro/labelme ``` -终端输入`labelme`会出现LableMe的交互界面,点击`OpenDir`打开`/examples/semantic_segmentation/data_annotated`,其中``为克隆下来的`labelme`的路径,打开后示意的是语义分割的真值标注。 - +Enter `labelme` in the terminal and the LableMe interactive interface will appear. Click `OpenDir` to open `/examples/semantic_segmentation/data_annotated`, where `` is the cloned `labelme` After opening, it shows the truth label of semantic segmentation. ![](../image/image-2.png)
-

图2 已标注图片的示意图

+

Figure 2 : Schematic diagram of annotated images

-* 开始标注 + * Start Annotating -请按照下述步骤标注数据集: +Please follow the steps below to label the dataset: -​ (1) 点击`OpenDir`打开待标注图片所在目录,点击`Create Polygons`,沿着目标的边缘画多边形,完成后输入目标的类别。在标注过程中,如果某个点画错了,可以按撤销快捷键可撤销该点。Mac下的撤销快捷键为`command+Z`。 +​ (1) Click `OpenDir` to open the directory where the picture to be labeled is located, click `Create Polygons`, draw polygons along the edges of the target, and enter the target category after completion. During the marking process, if a point is drawn wrong, you can press the undo shortcut key to undo the point. The undo shortcut key under Mac is `command+Z`. ![](../image/image-3.png)
-

图3 标注单个目标的示意图

+

Figure 3 : Schematic diagram of marking a single target

-​ (2) 右击选择`Edit Polygons`可以整体移动多边形的位置,也可以移动某个点的位置;右击选择`Edit Label`可以修改每个目标的类别。请根据自己的需要执行这一步骤,若不需要修改,可跳过。 +​ (2) Right-click and select `Edit Polygons` to move the position of the polygon as a whole or to move a certain point; right-click and select `Edit Label` to modify the category of each target. Please perform this step according to your own needs, if you don't need to modify it, you can skip it. ![](../image/image-4-2.png)
-

图4 修改标注的示意图

+

Figure 4 : Schematic diagram of modified annotation

-​ (3) 图片中所有目标的标注都完成后,点击`Save`保存json文件,**请将json文件和图片放在同一个文件夹里**,点击`Next Image`标注下一张图片。 +​ (3) After marking all targets in the picture, click `Save` to save the json file, **please put the json file and the picture in the same folder**, and click `Next Image` to mark the next picture. -LableMe产出的真值文件可参考我们给出的[文件夹](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo)。 +The truth value files produced by LableMe can refer to the [folder] (https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo) given by us. ![](../image/image-5.png)
-

图5 LableMe产出的真值文件的示意图

+

Figure 5 : Schematic diagram of the truth file produced by LableMe

**Note:** - 对于中间有空洞的目标的标注方法:在标注完目标轮廓后,再沿空洞区域边缘画多边形,并将其指定为其他类别,如果是背景则指定为`_background_`。如下: +Marking method for targets with holes in the middle: After marking the target outline, draw a polygon along the edge of the hole area and assign it to another category, and if it is a background, assign it to `_background_`. as follows: ![](../image/image-10.jpg)
-

图6 带空洞目标的标注示意图

+

Figure 6 : Schematic diagram of labeling with holes in the target

- ## 3 数据格式转换 +* 3.Data format conversion -最后用我们提供的数据转换脚本将上述标注工具产出的数据格式转换为模型训练时所需的数据格式。 +Finally, use the data conversion script we provide to convert the data format produced by the above-mentioned annotation tool into the data format required for model training. -* 经过数据格式转换后的数据集目录结构如下: +* The dataset directory structure after data format conversion is as follows: ``` - my_dataset # 根目录 - |-- annotations # 数据集真值 - | |-- xxx.png # 像素级别的真值信息 + my_dataset # Root directory + |-- annotations # Ground-truth + | |-- xxx.png # Pixel-level truth information | |... - |-- class_names.txt # 数据集的类别名称 - |-- xxx.jpg(png or other) # 数据集原图 + |-- class_names.txt # The category name of the dataset + |-- xxx.jpg(png or other) # Original image of dataset |-- ... - |-- xxx.json # 标注json文件 + |-- xxx.json # Json file,used to save annotation information |-- ... ``` @@ -106,29 +108,36 @@ LableMe产出的真值文件可参考我们给出的[文件夹](https://github.c ![](../image/image-6.png)
-

图7 格式转换后的数据集目录的结构示意图

+

Figure 7 : Schematic diagram of the structure of the dataset catalog after format conversion

-* 运行以下代码,将标注后的数据转换成满足以上格式的数据集: +* 4.Run the following code to convert the annotated data into a dataset that meets the above format: ``` - python pdseg/tools/labelme2seg.py + python tools/labelme2seg.py ``` -其中,``为图片以及LabelMe产出的json文件所在文件夹的目录,同时也是转换后的标注集所在文件夹的目录。 +Among them, `` is the directory of the folder where the picture and the json file produced by LabelMe are located, and it is also the directory of the folder where the converted label set is located. -我们已内置了一个标注的示例,可运行以下代码进行体验: +We have built a example, you can run the following code to experience: ``` -python pdseg/tools/labelme2seg.py docs/annotation/labelme_demo/ +python tools/labelme2seg.py legacy/docs/annotation/labelme_demo/ ``` -转换得到的数据集可参考我们给出的[文件夹](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo)。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`annotations`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。 +The converted dataset can refer to the [folder] (https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo) given by us. Among them, the file `class_names.txt` is the name of all the annotation categories in the dataset, including the background class; the folder `annotations` saves the pixel-level truth information of each picture, the background class `_background_` corresponds to 0, and other targets The category starts from 1 and increases up to 255. ![](../image/image-7.png)
-

图8 格式转换后的数据集各目录的内容示意图

+

Figure 8 : Schematic diagram of the contents of each catalog of the dataset after format conversion

+ +# 2、EISeg + +For the use of EISeg, please refer to [Document](../../../contrib/EISeg/README.md) + + + diff --git a/docs/data/transform/transform_c.md b/docs/data/transform/transform_cn.md similarity index 75% rename from docs/data/transform/transform_c.md rename to docs/data/transform/transform_cn.md index 6e6363f1d0..e1bfe5d1e5 100644 --- a/docs/data/transform/transform_c.md +++ b/docs/data/transform/transform_cn.md @@ -1,69 +1,93 @@ -# LabelMe数据标注教程 +简体中文|[English](transform.md) +# 数据标注教程 -## 1 LabelMe的安装 +无论是语义分割,全景分割,还是实例分割,我们都需要充足的训练数据。如果你想使用没有标注的原始数据集做分割任务,你必须先为原始图像作出标注。如果你使用的是Cityscapes等已有分割标注的数据集,你可以跳过本步骤。 +- 本文档将提供2种标注工具的使用教程:EISeg、LabelMe。 + +# 一、LabelMe +* 1.LabelMe的安装 用户在采集完用于训练、评估和预测的图片之后,需使用数据标注工具[LabelMe](https://github.com/wkentaro/labelme)完成数据标注。LabelMe支持在Windows/macOS/Linux三个系统上使用,且三个系统下的标注格式是一样。具体的安装流程请参见[官方安装指南](https://github.com/wkentaro/labelme)。 -## 2 LabelMe的使用 +* 2.LabelMe的使用 打开终端输入`labelme`会出现LableMe的交互界面,可以先预览`LabelMe`给出的已标注好的图片,再开始标注自定义数据集。 -
- +![](../image/image-1.png) + +

图1 LableMe交互界面的示意图

-* 预览已标注图片 + + * 预览已标注图片 获取`LabelMe`的源码: + ``` git clone https://github.com/wkentaro/labelme ``` + 终端输入`labelme`会出现LableMe的交互界面,点击`OpenDir`打开`/examples/semantic_segmentation/data_annotated`,其中``为克隆下来的`labelme`的路径,打开后示意的是语义分割的真值标注。 -
- +![](../image/image-2.png) + +

图2 已标注图片的示意图

-* 开始标注 + + + * 开始标注 请按照下述步骤标注数据集: ​ (1) 点击`OpenDir`打开待标注图片所在目录,点击`Create Polygons`,沿着目标的边缘画多边形,完成后输入目标的类别。在标注过程中,如果某个点画错了,可以按撤销快捷键可撤销该点。Mac下的撤销快捷键为`command+Z`。 -
- +![](../image/image-3.png) + +

图3 标注单个目标的示意图

+ + ​ (2) 右击选择`Edit Polygons`可以整体移动多边形的位置,也可以移动某个点的位置;右击选择`Edit Label`可以修改每个目标的类别。请根据自己的需要执行这一步骤,若不需要修改,可跳过。 -
- - +![](../image/image-4-2.png) + +

图4 修改标注的示意图

+ + ​ (3) 图片中所有目标的标注都完成后,点击`Save`保存json文件,**请将json文件和图片放在同一个文件夹里**,点击`Next Image`标注下一张图片。 LableMe产出的真值文件可参考我们给出的[文件夹](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo)。 -
- +![](../image/image-5.png) + +

图5 LableMe产出的真值文件的示意图

+ + **Note:** 对于中间有空洞的目标的标注方法:在标注完目标轮廓后,再沿空洞区域边缘画多边形,并将其指定为其他类别,如果是背景则指定为`_background_`。如下: -
- +![](../image/image-10.jpg) + +

图6 带空洞目标的标注示意图

- ## 3 数据格式转换 + + +* 3.数据格式转换 + 最后用我们提供的数据转换脚本将上述标注工具产出的数据格式转换为模型训练时所需的数据格式。 * 经过数据格式转换后的数据集目录结构如下: @@ -81,15 +105,18 @@ LableMe产出的真值文件可参考我们给出的[文件夹](https://github.c ``` -
- +![](../image/image-6.png) + +

图7 格式转换后的数据集目录的结构示意图

-* 运行以下代码,将标注后的数据转换成满足以上格式的数据集: + + +* 4.运行以下代码,将标注后的数据转换成满足以上格式的数据集: ``` - python pdseg/tools/labelme2seg.py + python tools/labelme2seg.py ``` 其中,``为图片以及LabelMe产出的json文件所在文件夹的目录,同时也是转换后的标注集所在文件夹的目录。 @@ -97,12 +124,20 @@ LableMe产出的真值文件可参考我们给出的[文件夹](https://github.c 我们已内置了一个标注的示例,可运行以下代码进行体验: ``` -python pdseg/tools/labelme2seg.py docs/annotation/labelme_demo/ +python tools/labelme2seg.py docs/annotation/labelme_demo/ ``` 转换得到的数据集可参考我们给出的[文件夹](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.8.0/docs/annotation/labelme_demo)。其中,文件`class_names.txt`是数据集中所有标注类别的名称,包含背景类;文件夹`annotations`保存的是各图片的像素级别的真值信息,背景类`_background_`对应为0,其它目标类别从1开始递增,至多为255。 -
- +![](../image/image-7.png) + +

图8 格式转换后的数据集各目录的内容示意图

+ + +# 二、EISeg +EISeg的使用请参考[文档](../../../contrib/EISeg/README.md) + + + diff --git a/docs/deployment/inference/inference.md b/docs/deployment/inference/inference.md new file mode 100644 index 0000000000..03503d2203 --- /dev/null +++ b/docs/deployment/inference/inference.md @@ -0,0 +1,42 @@ +# 本地Inference部署 + +## 1. 说明 + +本文档介绍使用飞桨推理的Python接口在服务器端部署分割模型。大家通过一定的配置,加上少量的代码,即可把模型集成到自己的服务中,完成图像分割的任务。 + +飞桨推理的[官网文档](https://paddleinference.paddlepaddle.org.cn/product_introduction/summary.html)介绍了部署模型的步骤、多种API接口、示例等等,大家可以根据实际需求进行使用。 + +## 2. 前置准备 + +请使用[模型导出工具](../../model_export.md)导出您的模型, 或点击下载我们的[样例模型](https://paddleseg.bj.bcebos.com/dygraph/demo/bisenet_demo_model.tar.gz)用于测试。 + +接着准备一张测试图片用于试验效果,我们提供了cityscapes验证集中的一张[图片](https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png)用于演示效果,如果您的模型是使用其他数据集训练的,请自行准备测试图片。 + +## 3. 预测 + +在终端输入以下命令进行预测: +```shell +python deploy/python/infer.py --config /path/to/deploy.yaml --image_path +``` + +参数说明如下: +|参数名|用途|是否必选项|默认值| +|-|-|-|-| +|config|**导出模型时生成的配置文件**, 而非configs目录下的配置文件|是|-| +|image_path|预测图片的路径或者目录|是|-| +|use_cpu|是否使用X86 CPU预测,默认是使用GPU预测|否|否| +|use_trt|是否开启TensorRT来加速预测|否|否| +|use_int8|启动TensorRT预测时,是否以int8模式运行|否|否| +|use_mkldnn|是否开启MKLDNN进行加速预测|否|否| +|batch_size|单卡batch size|否|配置文件中指定值| +|save_dir|保存预测结果的目录|否|output| +|with_argmax|对预测结果进行argmax操作|否|否| + +*测试样例和预测结果如下* +![cityscape_predict_demo.png](../../images/cityscapes_predict_demo.png) + +**注意** + +1. 当使用量化模型预测时,需要同时开启TensorRT预测和int8预测才会有加速效果 + +2. 使用TensorRT需要使用支持TRT功能的Paddle库,请参考[附录](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/Tables.html#whl-release)下载对应的PaddlePaddle安装包,或者参考[源码编译](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/compile/fromsource.html)自行编译。 \ No newline at end of file diff --git a/docs/design/create/add_new_model.md b/docs/design/create/add_new_model.md index d8a44c3db8..c39a76782e 100644 --- a/docs/design/create/add_new_model.md +++ b/docs/design/create/add_new_model.md @@ -1,10 +1,13 @@ +English | [简体中文](add_new_model_cn.md) # Add New Components -PaddleSeg provides five types of extensible components, i.e. MODELS, LOSSES, TRANSFORMS, BACKBONES, DATASETS. +> PaddleSeg provides five types of extensible components, i.e. MODELS, LOSSES, TRANSFORMS, BACKBONES, DATASETS. + +> PaddleSeg uses object-oriented design ideas. When creating your own model, please write it in the form of Python class. ## A New Model -If you intent to design a customized model, e.g, NewNet in newnet.py: +> If you intent to design a customized model, e.g, NewNet in newnet.py (you are allowed to give your model any name, but don't conflict with an existing model name): ```python import paddle.nn as nn @@ -18,36 +21,68 @@ class NewNet(nn.Layer): pass ``` -**Step 1**: Put newnet.py under paddleseg/models/. +* **Step 1**: Put newnet.py under paddleseg/models/. -**Step 2**: Add @manager.MODELS.add_component above your model class, where the manager is a component container, inclduing MODELS, BACKBONES, DATASETS, TRANSFORMS, LOSSES. +* **Step 2**: Add @manager.MODELS.add_component above your model class, where the manager is a component container, inclduing MODELS, BACKBONES, DATASETS, TRANSFORMS, LOSSES.When you add this decorator and specify the parameters reasonably during training, PaddleSeg can automatically add the modules you implement to the training configuration, reflecting the design idea of low coupling. -**Step 3**: Import your class in paddleseg/models/\_\_init\_\_.py, like this: +* **Step 3**: Import your class in paddleseg/models/\_\_init\_\_.py, like this: ```python -from .newnet import NewNet +from .backbones import * # Backbone network classes that have been implemented +from .losses import * # Losses classes that have been implemented + +from .ann import * #Currently 21 segmentation models have been implemented. You will add your own custom segmentation model following the same rules. +from .bisenet import * +from .danet import * +from .deeplab import * +from .fast_scnn import * +from .fcn import * +from .gcnet import * +from .ocrnet import * +from .pspnet import * +from .gscnn import GSCNN +from .unet import UNet +from .hardnet import HarDNet +from .u2net import U2Net, U2Netp +from .attention_unet import AttentionUNet +from .unet_plusplus import UNetPlusPlus +from .unet_3plus import UNet3Plus +from .decoupled_segnet import DecoupledSegNet +from .emanet import * +from .isanet import * +from .dnlnet import * +from .sfnet import * +from .shufflenet_slim import ShuffleNetV2 # Please do not change the above import, otherwise it will cause some models to be unavailable + +from .newnet import NewNet # Please add your own segmentation model class here. ``` +- Note: If you only implement your own class without importing your class in the models constructor, PaddleSeg will not be able to recognize the model you added! -**Step 4**: Specify the model name in a yaml file: +* **Step 4**: Specify the model name in a yaml file:(This parameter must be consistent with the NewNet class name in newnet.py, please do not enter the name of the python file by mistake). -```python +> In addition, please remember the full path of the created yaml file so that you can use the configuration you want when setting the model configuration parameters for train.py later. It is recommended to save the yaml files of the model in the corresponding model directory of PaddleSeg/configs. + +```yaml model: - type: NewNet + type: NewNet # Your custom model name. param1: ... param2: ... param3: ... ``` -Note: If your model has more than one output, i.e. main loss + auxiliary losses, you have to modify loss in the yaml file, otherwise it will throw out an error like "The length of logits should equal to the types of loss config: 2!=1.". For example, PSPNet has two losses, where both are CrossEntropyLoss, and the weight of auxilary loss is 0.4, thus we have the loss settings: -```python +- Note: If your model has more than one output, i.e. main loss + auxiliary losses, you have to modify loss in the yaml file, otherwise it will throw out an error like "The length of logits should equal to the types of loss config: 2!=1.". For example, PSPNet has two losses, where both are CrossEntropyLoss, and the weight of auxilary loss is 0.4, thus we have the loss settings: + +```yaml loss: types: - type: CrossEntropyLoss - coef: [1, 0.4] + coef: [1, 0.4] # The main loss and auxiliary loss are assigned different proportions, that is, the main loss has a greater impact on the final loss. ``` + ## A New Loss -If you intent to implement a new loss, e.g. NewLoss in new_loss.py. +> If you intent to implement a new loss, e.g. NewLoss in new_loss.py (you are allowed to give your model any name, but don't conflict with an existing loss name): + ```python import paddle.nn as nn @@ -61,28 +96,46 @@ class NewLoss(nn.Layer): pass ``` -**Step 1**: Put new_loss.py under paddleseg/models/losses. -**Step 2**: Add @manager.LOSSES.add_component above your loss class. -**Step 3**: Import your class in paddleseg/models/losses/\_\_init\_\_.py, like this: +* **Step 1**: Put new_loss.py under paddleseg/models/losses. + +* **Step 2**: Add @manager.LOSSES.add_component above your loss class. + +* **Step 3**: Import your class in paddleseg/models/losses/\_\_init\_\_.py, like this: + ```python -from .new_loss import NewLoss +from .mixed_loss import MixedLoss +from .cross_entropy_loss import CrossEntropyLoss +from .binary_cross_entropy_loss import BCELoss +from .lovasz_loss import LovaszSoftmaxLoss, LovaszHingeLoss +from .gscnn_dual_task_loss import DualTaskLoss +from .edge_attention_loss import EdgeAttentionLoss +from .bootstrapped_cross_entropy import BootstrappedCrossEntropyLoss +from .dice_loss import DiceLoss +from .ohem_cross_entropy_loss import OhemCrossEntropyLoss +from .decoupledsegnet_relax_boundary_loss import RelaxBoundaryLoss +from .ohem_edge_attention_loss import OhemEdgeAttentionLoss +from .l1_loss import L1Loss +from .mean_square_error_loss import MSELoss # Please do not change the above import, otherwise it will cause some loss functions to be unavailable. + +from .new_loss import NewLoss # Please add your own loss function class here. ``` -**Step 4**: Specify the loss name in a yaml file: +* **Step 4**: Specify the loss name in a yaml file: -```python +```yaml loss: types: - - type: NewLoss + - type: NewLoss # The name of your custom loss function param1: ... coef: [1] ``` -## A New Transform +## A New Transform(Data Augmentation) + -If you intent to implement a new transform (data augmentation), e.g. NewTrans. +> If you intent to implement a new transform (data augmentation), e.g. NewTrans (you are allowed to give your model any name, but don't conflict with an existing transform name): ```python @@ -100,23 +153,37 @@ class NewTrans(nn.Layer): return (im, label) ``` -**Step 1**: Define the NewTrans class in paddleseg/transforms/transforms.py. -**Step 2**: Add @manager.TRANSFORMS.add_component above your transform class. That's all. +* **Step 1**: Define the NewTrans class in paddleseg/transforms/transforms.py. -**Step 3**: Specify the transform name in a yaml file: +* **Step 2**: Add @manager.TRANSFORMS.add_component above your transform class. That's all. +- Please note that it is no longer necessary to import this class into the constructor of transforms. In PaddleSeg, the transform component integrates all classes in one file. You can check PaddleSeg/paddleseg/transforms/\_\_init\_\_.py to see the file content and the above mentioned paddleseg/models/\_\_init\_\_.py and paddleseg/models/ What is the difference in the file content of losses/\_\_init\_\_.py. ```python +from .transforms import * +from . import functional + +# As you can see, in the PaddleSeg/paddleseg/transforms/\_\_init\_\_.py file, import all existing data transformation strategies with from .transforms import *. + +# Therefore, after your custom transform class is written, it will be automatically added during the creation of the class object. +``` + +* **Step 3**: Specify the type parameter in the yaml file as the name of the data transformation (data enhancement) you created: + +```yaml train_dataset: transforms: - - type: NewTrans + - type: NewTrans # Your custom data transformation name param1: ... ``` -Note: For better readability,please implement detailed transformation functions in paddleseg/transforms/functional.py. + +> Note: For better readability, please implement detailed conversion functions in paddleseg/transforms/functional.py. + ## A New Backbone -If you intent to add a new backbone network, e.g. NewBackbone in new_backbone.py. + +> If you intent to add a new backbone network, e.g. NewBackbone in new_backbone.py(you are allowed to give your model any name, but don't conflict with an existing backbone name): ```python import paddle.nn as nn @@ -130,27 +197,36 @@ class NewBackbone(nn.Layer): pass ``` -**Step 1**: Put new_backbone.py under paddleseg/models/backbones. -**Step 2**: Add @manager.BACKBONES.add_component above your backbone class. -**Step 3**: Import your class in paddleseg/models/backbones/\_\_init\_\_.py, like this: +* **Step 1**: Put new_backbone.py under paddleseg/models/backbones. + +* **Step 2**: Add @manager.BACKBONES.add_component above your backbone class. + +* **Step 3**: Import your class in paddleseg/models/backbones/\_\_init\_\_.py, like this: ```python -from .new_backbone import NewBackbone +# Currently supports 4 types of backbone networks. +from .hrnet import * +from .resnet_vd import * +from .xception_deeplab import * +from .mobilenetv3 import * # Please do not modify the above import, otherwise it will cause some backbone networks to be unavailable. + +from .new_backbone import NewBackbone # Please add your own backbone network class here. ``` -**Step 4**: Specify the backbone name in a yaml file: +* **Step 4**: Specify the backbone name in a yaml file: -```python +```yaml model: backbone: - type: NewBackbone + type: NewBackbone # Your custom backbone network name. param1: ... ``` ## A New Dataset -If you intent to add a new dataset, e.g. NewData in new_data.py. + +> If you intent to add a new dataset, e.g. NewData in new_data.py: ```python from paddleseg..dataset import Dataset @@ -165,20 +241,90 @@ class NewData(Dataset): pass ``` -**Step 1**: Put new_data.py under paddleseg/datasets. -**Step 2**: Add @manager.DATASETS.add_component above your dataset class. +* **Step 1**: Put new_data.py under paddleseg/datasets. -**Step 3**: Import your class in paddleseg/datasets/\_\_init\_\_.py, like this: +* **Step 2**: Add @manager.DATASETS.add_component above your dataset class. + +* **Step 3**: Import your class in paddleseg/datasets/\_\_init\_\_.py, like this: ```python -from .new_data import NewData +from .dataset import Dataset +from .cityscapes import Cityscapes +from .voc import PascalVOC +from .ade import ADE20K +from .optic_disc_seg import OpticDiscSeg +from .pascal_context import PascalContext +from .mini_deep_globe_road_extraction import MiniDeepGlobeRoadExtraction # Please do not change the above import, otherwise it will cause some data sets to be unavailable. + +from .new_data import NewData # Please add your own implementation of the dataset class here. ``` +* **Step 4**: Specify the backbone name in a yaml file: -**Step 4**: Specify the backbone name in a yaml file: - -```python +```yaml train_dataset: - type: NewData + type: NewData # Your custom data set name. dataset_root: ... mode: train ``` + + +# Example +> Suppose we have written five custom components according to the above Steps: MODELS (NewNet), LOSSES (NewLoss), TRANSFORMS (NewTrans), BACKBONES (NewBackbone), DATASETS (NewData). +> Suppose we want to write a yaml file for the training of a custom model to set the parameters used this time. Here we are mainly concerned with the configuration of custom component parameters, other parameters (such as optimizers) are not introduced too much, and it is recommended to use the reference configuration. Such as: +```yaml +batch_size: 4 # Set the number of pictures sent to the network at one iteration. Generally speaking, the larger the video memory of the machine you are using, the higher the batch_size value. + +iters: 10000 # Number of iterations. + +model: + type: NewNet # The name of the custom model class. + backbone: + type: NewBackbone # Customize the name of the backbone network class. + pretrained: Null # If you implement the parameters of the trained backbone network, please specify its storage path. + num_classes: 5 # Label the number of pixel categories in the image. Your model should be designed according to the specific segmentation task, so you know the number of pixel categories under the task. + pretrained: Null # If you have network and pre-training parameters, please specify the storage path. + backbone_indices: [-1] + +loss: + types: + - type: NewLoss # The name of the custom loss function class + coef: [1] # If multiple losses are used, the length of the list is consistent with the number of losses. + + +train_dataset: + type: NewData # The name of the custom dataset class + dataset_root: data/custom_data # Please refer to the README file, organize the files needed for the segmentation task according to its recommended organization structure, and place them in the corresponding project path. It is recommended to put it under data/ + transforms: + - type: NewTrans # The name of the custom data conversion (data enhancement) class. + custom_factor: 0.5 + mode: train # Set training mode for training set. + + +val_dataset: + type: NewData + dataset_root: data/custom_data + transforms: + - type: Normalize + mode: val # Set the verification mode for the verification set. + +optimizer: # Optimizer settings. + type: sgd + momentum: 0.9 + weight_decay: 4.0e-5 + +lr_scheduler: # Learning rate setting. + type: PolynomialDecay + learning_rate: 0.01 + power: 0.9 + end_lr: 0 +``` + +Suppose we save the above yaml file as PaddleSeg/configs/custom_configs/NewNet_NewLoss_NewTrans_NewBackbone_NewData.yml, please switch to the PaddleSeg directory and run the following command: +``` +python train.py \ + --config configs/custom_configs/NewNet_NewLoss_NewTrans_NewBackbone_NewData.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` \ No newline at end of file diff --git a/docs/design/create/add_new_model_cn.md b/docs/design/create/add_new_model_cn.md new file mode 100644 index 0000000000..10f0ddc2af --- /dev/null +++ b/docs/design/create/add_new_model_cn.md @@ -0,0 +1,330 @@ +简体中文 | [English](add_new_model.md) +# 添加组件 + +> PaddleSeg 提供了五种类型的可扩展组件,即 MODELS、LOSSES、TRANSFORMS、BACKBONES、DATASETS。 + +> PaddleSeg使用基于面向对象的设计思想,在创造你自己的模型时,请以 Python class 的形式编写。 + +## 创建自定义分割模型 + +> 如果你打算设计一个自定义分割模型,例如在 newnet.py 中实现 NewNet 类(你可以为你的模型起任何名字,但不要与已有模型的名字重复): + +```python +import paddle.nn as nn +from paddleseg.cvlibs import manager + +@manager.MODELS.add_component +class NewNet(nn.Layer): + def __init__(self, param1, param2, param3): + pass + def forward(self, x): + pass +``` + +* **步骤 1**: 将 newnet.py 文件放置在目录 paddleseg/models/ 下。 + +* **步骤 2**: 在你的自定义模型类的上方添加一个Python装饰器 ``@manager.MODELS.add_component``。manager 是一个组件容器,包括 MODELS、BACKBONES、DATASETS、TRANSFORMS、LOSSES。当你添加了这个装饰器并在训练时合理的指定参数,PaddleSeg就可以自动将你实现的模块添加到训练配置中,体现了低耦合的设计思想。 + +* **步骤 3**: 在 paddleseg/models/\_\_init\_\_.py 中导入你的自定义分割模型类,如下所示: +```python +from .backbones import * # 已经实现的骨干网络类 +from .losses import * # 已经实现的损失函数类 + +from .ann import * #目前已经实现的21种分割模型。你将按照同样的规则添加自己的自定义分割模型。 +from .bisenet import * +from .danet import * +from .deeplab import * +from .fast_scnn import * +from .fcn import * +from .gcnet import * +from .ocrnet import * +from .pspnet import * +from .gscnn import GSCNN +from .unet import UNet +from .hardnet import HarDNet +from .u2net import U2Net, U2Netp +from .attention_unet import AttentionUNet +from .unet_plusplus import UNetPlusPlus +from .unet_3plus import UNet3Plus +from .decoupled_segnet import DecoupledSegNet +from .emanet import * +from .isanet import * +from .dnlnet import * +from .sfnet import * +from .shufflenet_slim import ShuffleNetV2 # 以上导入请不要改动,否则会导致一些模型不可用 + +from .newnet import NewNet # 请在这里添加你自己实现的分割模型类 +``` +- 注意:如果仅实现了自己的类,而不在 models 的构造函数中导入你的类,PaddleSeg将无法识别你添加的模型! + +* **步骤 4**: 在 yaml 文件中将 type 参数指定为你所创建的分割模型的名称(该参数必须与 newnet.py 中的 NewNet 类名保持一致,请不要误输为python文件的名称)。 + +> 另外,请你记住所创建的 yaml 文件的完整路径,以便后续在为 train.py 设置模型配置参数时使用你想要的配置。建议将模型的 yaml 文件都保存在 PaddleSeg/configs 的对应模型目录下。 + +```yaml +model: + type: NewNet # 你的自定义模型名称 + param1: ... + param2: ... + param3: ... +``` + +- 注意:如果你的模型有多个输出,即损失 = 主损失 + 辅助损失,则你必须修改 yaml 文件中的相应参数,否则会抛出“ logits 的长度应该等于 loss 配置的类型数目: 2!=1。”的错误。 比如 PSPNet 有两个 loss,且都是 CrossEntropyLoss,辅助loss的权重是0.4,所以我们对 loss 的设置如下: + +```yaml +loss: + types: + - type: CrossEntropyLoss + coef: [1, 0.4] #为主损失和辅助损失分配不同的比重,即主损失对最终损失影响更大。 +``` + + +## 创建自定义损失函数 + +> 如果你打算设计一个自定义损失函数,例如在 new_loss.py 中实现 NewLoss 类(你可以为你的损失函数起任何名字,但不要与已有损失函数的名字重复): + + +```python +import paddle.nn as nn +from paddleseg.cvlibs import manager + +@manager.LOSSES.add_component +class NewLoss(nn.Layer): + def __init__(self, param1, ignore_index=255): + pass + def forward(self, x): + pass +``` + + + +* **步骤 1**: 将 new_loss.py 文件放置在目录 paddleseg/models/losses 下。 + +* **步骤 2**: 在你的自定义损失函数类的上方添加一个Python装饰器 ``@manager.LOSSES.add_component``。 + +* **步骤 3**: 在 paddleseg/models/losses/\_\_init\_\_.py 中导入你的自定义损失函数类,如下所示: + +```python +from .mixed_loss import MixedLoss +from .cross_entropy_loss import CrossEntropyLoss +from .binary_cross_entropy_loss import BCELoss +from .lovasz_loss import LovaszSoftmaxLoss, LovaszHingeLoss +from .gscnn_dual_task_loss import DualTaskLoss +from .edge_attention_loss import EdgeAttentionLoss +from .bootstrapped_cross_entropy import BootstrappedCrossEntropyLoss +from .dice_loss import DiceLoss +from .ohem_cross_entropy_loss import OhemCrossEntropyLoss +from .decoupledsegnet_relax_boundary_loss import RelaxBoundaryLoss +from .ohem_edge_attention_loss import OhemEdgeAttentionLoss +from .l1_loss import L1Loss +from .mean_square_error_loss import MSELoss # 以上导入请不要改动,否则会导致一些损失函数不可用 + +from .new_loss import NewLoss # 请在这里添加你自己实现的损失函数类 +``` + +* **步骤 4**: 在 yaml 文件中将 type 参数指定为你所创建的损失函数的名称。 + +```yaml +loss: + types: + - type: NewLoss # 你的自定义损失函数名称 + param1: ... + coef: [1] +``` + +## 创建自定义数据变换(数据增强) + + +> 如果你打算设计一个自定义数据变换(数据增强),例如在 transforms.py 中新实现一个 NewTrans 类: + +```python + +@manager.TRANSFORMS.add_component +class NewTrans(nn.Layer): + def __init__(self, param1): + pass + def __call__(self, im, label=None): + + ... + + if label is None: + return (im, ) + else: + return (im, label) +``` + + +* **步骤 1**: 在 paddleseg/transforms/transforms.py 文件中定义 NewTrans 类。 + +* **步骤 2**: 在你的 transform 类的上方添加一个Python装饰器 ``@manager.TRANSFORMS.add_component``。这样就可以了。 +- 请注意,不再需要将该类导入到transforms的构造函数中了。在PaddleSeg中,transform组件把所有类都集成在一个文件里。你可以查看PaddleSeg/paddleseg/transforms/\_\_init\_\_.py,看看其文件内容与上文提到的 paddleseg/models/\_\_init\_\_.py 与 paddleseg/models/losses/\_\_init\_\_.py 的文件内容有何不同。 + +```python +from .transforms import * +from . import functional + +# 可以看到,PaddleSeg/paddleseg/transforms/\_\_init\_\_.py 文件中,以from .transforms import *导入所有已有的数据变换策略。 + +# 因此在你的自定义 transform 类写好后,在类对象创建过程中,它会被自动添加进来。 +``` + +* **步骤 3**: 在 yaml 文件中将 type 参数指定为你所创建的数据变换(数据增强)的名称: + +```yaml +train_dataset: + transforms: + - type: NewTrans # 你的自定义数据变换名称 + param1: ... +``` + +> 注意:为了更好的可读性,请在 paddleseg/transforms/functional.py 中实现详细的转换函数。 + + +## 创建自定义骨干网络 + + +> 如果你打算设计一个自定义骨干网络,例如在 new_backbone.py 中实现 NewBackbone 类(你可以为你的骨干网络起任何名字,但不要与已有骨干网络的名字重复): + +```python +import paddle.nn as nn +from paddleseg.cvlibs import manager + +@manager.BACKBONES.add_component +class NewBackbone(nn.Layer): + def __init__(self, param1): + pass + def forward(self, x): + pass +``` + + + +* **步骤 1**: 将 new_backbone.py 文件放置在目录 paddleseg/models/backbones 下。 + +* **步骤 2**: 在你的自定义骨干网络类的上方添加一个Python装饰器 ``@manager.BACKBONES.add_component``。 + +* **步骤 3**: 在 paddleseg/models/backbones/\_\_init\_\_.py 中导入你的自定义骨干网络类,如下所示: +```python +# 目前支持4种骨干网络 +from .hrnet import * +from .resnet_vd import * +from .xception_deeplab import * +from .mobilenetv3 import * # 以上导入请不要改动,否则会导致一些骨干网络不可用 + +from .new_backbone import NewBackbone # 请在这里添加你自己实现的骨干网络类 +``` + +* **步骤 4**: 在 yaml 文件中将 type 参数指定为你所创建的骨干网络的名称。 + +```yaml +model: + backbone: + type: NewBackbone # 你的自定义骨干网络名称 + param1: ... +``` + +## 创建自定义数据集 + + +> 如果你打算设计一个自定义数据集,例如在 new_data.py 中实现 NewData 类: + +```python +from paddleseg..dataset import Dataset +from paddleseg.cvlibs import manager + +@manager.DATASETS.add_component +class NewData(Dataset): + def __init__(self, + dataset_root=None, + transforms=None, + mode='train'): + pass +``` + + +* **步骤 1**: 将 new_data.py 文件放置在目录 paddleseg/datasets 下。 + +* **步骤 2**: 在你的自定义数据集类的上方添加一个Python装饰器 ``@manager.DATASETS.add_component``。 + +* **步骤 3**: 在 paddleseg/datasets/\_\_init\_\_.py 中导入你的自定义数据集类,如下所示: +```python +from .dataset import Dataset +from .cityscapes import Cityscapes +from .voc import PascalVOC +from .ade import ADE20K +from .optic_disc_seg import OpticDiscSeg +from .pascal_context import PascalContext +from .mini_deep_globe_road_extraction import MiniDeepGlobeRoadExtraction # 以上导入请不要改动,否则会导致一些数据集不可用 + +from .new_data import NewData # 请在这里添加你自己实现的数据集类 +``` +* **步骤 4**: 在 yaml 文件中将 type 参数指定为你所创建的数据集的名称。 + +```yaml +train_dataset: + type: NewData # 你的自定义数据集名称 + dataset_root: ... + mode: train +``` + + +# 举例 +> 假设我们已经按照以上步骤编写好了五个自定义组件:MODELS(NewNet)、LOSSES(NewLoss)、TRANSFORMS(NewTrans)、BACKBONES(NewBackbone)、DATASETS(NewData)。 +> 假设我们要为自定义模型的训练编写 yaml 文件,以设定本次用到的参数。此处我们主要关心对自定义组件参数的配置,其他参数(如优化器)不过多介绍,建议沿用参考配置。如: +```yaml +batch_size: 4 # 设定迭代一次送入网络的图片数量。一般来说,你所使用机器的显存越大,可以调高batch_size的值。 + +iters: 10000 # 迭代次数 + +model: + type: NewNet # 自定义模型类的名称 + backbone: + type: NewBackbone # 自定义骨干网络类的名称 + pretrained: Null # 如果你实现训练过骨干网络的参数,请指定其存放路径 + num_classes: 5 # 标注图中像素类别个数。你的模型应该是根据具体的分割任务设计的,因此你知道该任务下像素类别个数 + pretrained: Null # 如果你有网络的与预训练参数,请指定其存放路径 + backbone_indices: [-1] + +loss: + types: + - type: NewLoss # 自定义损失函数类的名称 + coef: [1] # 若使用多种loss,该列表长度与loss数目保持一致 + + +train_dataset: + type: NewData # 自定义数据集类的名称 + dataset_root: data/custom_data # 请参考README文档,按其推荐的整理结构组织分割任务所需要的文件,将其放在相应的项目路径下。推荐放在data/下。 + transforms: + - type: NewTrans # 自定义数据转换(数据增强)类的名称 + custom_factor: 0.5 + mode: train # 对训练集设定训练模式 + + +val_dataset: + type: NewData + dataset_root: data/custom_data + transforms: + - type: Normalize + mode: val # 对验证集设定验证模式 + +optimizer: # 优化器设置 + type: sgd + momentum: 0.9 + weight_decay: 4.0e-5 + +lr_scheduler: # 学习率的设置 + type: PolynomialDecay + learning_rate: 0.01 + power: 0.9 + end_lr: 0 +``` + +假设我们将上面的 yaml 文件保存为 PaddleSeg/configs/custom_configs/NewNet_NewLoss_NewTrans_NewBackbone_NewData.yml,请先切换到PaddleSeg目录下后,运行以下命令: +``` +python train.py \ + --config configs/custom_configs/NewNet_NewLoss_NewTrans_NewBackbone_NewData.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` \ No newline at end of file diff --git a/docs/design/use/use.md b/docs/design/use/use.md index db369cdce4..01fa81cfdc 100644 --- a/docs/design/use/use.md +++ b/docs/design/use/use.md @@ -1,123 +1,132 @@ -# 配置项 +English | [简体中文](use_cn.md) +# Configuration item ---- ### train_dataset -> 训练数据集 +* Training dataset > -> * 参数 -> * type : 数据集类型,所支持值请参考训练配置文件 -> * **others** : 请参考对应模型训练配置文件 +> * Args +> * type : Dataset type, please refer to [Data Set Document](../../apis/datasets/datasets.md) for the supported values +> * **others** : Please refer to [Training Configuration File of Corresponding Model](../../../configs) ---- ### val_dataset -> 评估数据集 -> * 参数 -> * type : 数据集类型,所支持值请参考训练配置文件 -> * **others** : 请参考对应模型训练配置文件 +* Validation dataset +> * Args +> * type : Dataset type, please refer to [Data Set Document](../../apis/datasets/datasets.md) for the supported values +> * **others** : Please refer to [Training Configuration File of Corresponding Model](../../../configs) > ---- ### batch_size -> 单张卡上,每步迭代训练时的数据量 +* On a single card, the amount of data during each iteration of training. Generally speaking, the larger the video memory of the machine you are using, the larger the batch_size value. ---- ### iters -> 训练步数 +* The process of using a batch of data to update the parameters of the semantic segmentation model is called one training, that is, one iteration. Iters is the number of iterations in the training process. ---- ### optimizer +<<<<<<< HEAD > 训练优化器 > * 参数 > * type : 优化器类型,支持目前Paddle官方所有优化器 > * weight_decay : L2正则化的值 > * **others** : 请参考[Paddle官方Optimizer文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/Overview_cn.html) +======= +* Optimizer in training +> * Args +> * type : Optimizer type, currently only supports'sgd' and'adam' +> * momentum : Momentum optimization. +> * weight_decay : L2 regularized value. +>>>>>>> release/2.2 ---- ### lr_scheduler -> 学习率 -> * 参数 -> * type : 学习率类型,支持10种策略,分别是'PolynomialDecay', 'PiecewiseDecay', 'StepDecay', 'CosineAnnealingDecay', 'ExponentialDecay', 'InverseTimeDecay', 'LinearWarmup', 'MultiStepDecay', 'NaturalExpDecay', 'NoamDecay'. -> * **others** : 请参考[Paddle官方LRScheduler文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/lr/LRScheduler_cn.html) +* Learning rate +> * Args +> * type : Learning rate type, supports 12 strategies: 'PolynomialDecay', 'PiecewiseDecay', 'StepDecay', 'CosineAnnealingDecay', 'ExponentialDecay', 'InverseTimeDecay', 'LinearWarmup', 'MultiStepDecay', 'NaturalExpDecay', 'NoamDecay', ReduceOnPlateau, LambdaDecay. +> * **others** : Please refer to [Paddle official LRScheduler document](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/lr/LRScheduler_cn.html) ---- -### learning_rate(不推荐使用该配置,将来会被废弃,建议使用`lr_scheduler`代替) -> 学习率 -> * 参数 -> * value : 初始学习率 -> * decay : 衰减配置 -> * type : 衰减类型,目前只支持poly -> * power : 衰减率 -> * end_lr : 最终学习率 +### learning_rate(This configuration is not recommended and will be obsolete in the future. It is recommended to use `lr_scheduler` instead) +* Learning rate +> * Args +> * value : Initial learning rate. +> * decay : Attenuation configuration. +> * type : Attenuation type, currently only supports poly. +> * power : Attenuation rate. +> * end_lr : Final learning rate. ---- ### loss -> 损失函数 -> * 参数 -> * types : 损失函数列表 -> * type : 损失函数类型,所支持值请参考损失函数库 -> * coef : 对应损失函数列表的系数列表 +* Loss function +> * Args +> * types : List of loss functions. +> * type : Loss function type, please refer to the loss function library for the supported values. +> * coef : List of coefficients corresponding to the loss function list. ---- ### model -> 待训练模型 -> * 参数 -> * type : 模型类型,所支持值请参考模型库 -> * **others** : 请参考对应模型训练配置文件 +* Model to be trained +> * Args +> * type : Model type, please refer to [Model Library](../../apis/models/models.md) for the supported values +> * **others** : Please refer to [Training Configuration File of Corresponding Model](../../../configs) --- ### export -> 模型导出配置 -> * 参数 -> * transforms : 预测时的预处理操作,支持配置的transforms与`train_dataset`、`val_dataset`等相同。如果不填写该项,默认只会对数据进行归一化标准化操作。 +* Model export configuration +> * Args +> * transforms : The preprocessing operation during prediction, the supported transforms are the same as `train_dataset`, `val_dataset`, etc. If you do not fill in this item, only the data will be normalized by default. -# 示例 +# Example ```yaml -batch_size: 4 -iters: 80000 - -train_dataset: - type: Cityscapes - dataset_root: data/cityscapes - transforms: - - type: ResizeStepScaling - min_scale_factor: 0.5 +batch_size: 4 # Set the number of pictures sent to the network at one iteration. Generally speaking, the larger the video memory of the machine you are using, the higher the batch_size value. +iters: 80000 # Number of iterations + +train_dataset: # Training dataset + type: Cityscapes # The name of the training dataset class + dataset_root: data/cityscapes # The directory where the training dataset is stored + transforms: # Data transformation and data augmentation + - type: ResizeStepScaling # The image is scaled according to a certain ratio, and this ratio takes scale_step_size as the step size + min_scale_factor: 0.5 # Parameters involved in the scaling process max_scale_factor: 2.0 scale_step_size: 0.25 - - type: RandomPaddingCrop + - type: RandomPaddingCrop # Random cropping of images and annotations crop_size: [1024, 512] - - type: RandomHorizontalFlip - - type: Normalize - mode: train + - type: RandomHorizontalFlip # Flip the image horizontally with a certain probability + - type: Normalize # Normalize the image + mode: train # Training mode -val_dataset: - type: Cityscapes - dataset_root: data/cityscapes +val_dataset: # Validation dataset + type: Cityscapes # The name of the validating dataset class + dataset_root: data/cityscapes # The directory where the validating dataset is stored transforms: - - type: Normalize - mode: val + - type: Normalize # Normalize the image + mode: val # Validating mode -optimizer: - type: sgd +optimizer: # Which optimizer to use + type: sgd # Stochastic gradient descent momentum: 0.9 weight_decay: 4.0e-5 -lr_scheduler: - type: PolynomialDecay +lr_scheduler: # Related settings for learning rate + type: PolynomialDecay # A type of learning rate,a total of 12 strategies are supported learning_rate: 0.01 power: 0.9 end_lr: 0 -loss: +loss: # What loss function to use types: - - type: CrossEntropyLoss - coef: [1] + - type: CrossEntropyLoss # Cross entropy loss function + coef: [1] # When multiple loss functions are used, the ratio of each loss can be specified in coef -model: +model: # Which semantic segmentation model to use type: FCN - backbone: + backbone: # What kind of backbone network to use type: HRNet_W18 - pretrained: pretrained_model/hrnet_w18_ssld - num_classes: 19 + pretrained: pretrained_model/hrnet_w18_ssld # Specify the storage path of the pre-trained model + num_classes: 19 # Number of pixel categories pretrained: Null backbone_indices: [-1] diff --git a/docs/design/use/use_cn.md b/docs/design/use/use_cn.md new file mode 100644 index 0000000000..3bef59ff6e --- /dev/null +++ b/docs/design/use/use_cn.md @@ -0,0 +1,125 @@ +简体中文 | [English](use.md) +# 配置项 + +---- +### train_dataset +* 训练数据集 +> +> * 参数 +> * type : 数据集类型,所支持值请参考[数据集文档](../../apis/datasets/datasets_cn.md) +> * **others** : 请参考[对应模型的训练配置文件](../../../configs) + +---- +### val_dataset +* 验证数据集 +> * 参数 +> * type : 数据集类型,所支持值请参考[数据集文档](../../apis/datasets/datasets_cn.md) +> * **others** : 请参考[对应模型的训练配置文件](../../../configs) +> + +---- +### batch_size +* 单张卡上,每步迭代训练时的数据量。一般来说,你所使用机器的显存越大,可以相应的调高batch_size的值。 + +---- +### iters +* 使用一个 batch 数据对语义分割模型进行一次参数更新的过程称之为一次训练,即一次迭代。iters 即为训练过程中的迭代次数。 + +---- +### optimizer +* 训练优化器 +> * 参数 +> * type : 优化器类型,目前只支持'sgd'和'adam' +> * momentum : 动量优化法 +> * weight_decay : L2正则化的值 + +---- +### lr_scheduler +* 学习率 +> * 参数 +> * type : 学习率类型,支持12种策略,分别是'PolynomialDecay', 'PiecewiseDecay', 'StepDecay', 'CosineAnnealingDecay', 'ExponentialDecay', 'InverseTimeDecay', 'LinearWarmup', 'MultiStepDecay', 'NaturalExpDecay', 'NoamDecay', ReduceOnPlateau, LambdaDecay. +> * **others** : 请参考[Paddle官方LRScheduler文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/lr/LRScheduler_cn.html) + +---- +### learning_rate(不推荐使用该配置,将来会被废弃,建议使用`lr_scheduler`代替) +* 学习率 +> * 参数 +> * value : 初始学习率 +> * decay : 衰减配置 +> * type : 衰减类型,目前只支持poly +> * power : 衰减率 +> * end_lr : 最终学习率 + +---- +### loss +* 损失函数 +> * 参数 +> * types : 损失函数列表 +> * type : 损失函数类型,所支持值请参考损失函数库 +> * coef : 对应损失函数列表的系数列表 + +---- +### model +* 待训练模型 +> * 参数 +> * type : 模型类型,所支持值请参考[模型库](../../apis/models/models_cn.md) +> * **others** : 请参考[对应模型的训练配置文件](../../../configs) +--- +### export +* 模型导出配置 +> * 参数 +> * transforms : 预测时的预处理操作,支持配置的transforms与`train_dataset`、`val_dataset`等相同。如果不填写该项,默认只会对数据进行归一化标准化操作。 + +# 示例 + +```yaml +batch_size: 4 # 设定迭代一次送入网络的图片数量。一般来说,你所使用机器的显存越大,可以调高batch_size的值。 +iters: 80000 # 迭代次数 + +train_dataset: # 训练数据集 + type: Cityscapes # 训练数据集类的名称 + dataset_root: data/cityscapes # 训练数据集存放的目录 + transforms: # 数据变换与数据增强 + - type: ResizeStepScaling # 对图像按照某一个比例进行缩放,这个比例以scale_step_size为步长 + min_scale_factor: 0.5 # 缩放过程中涉及的参数 + max_scale_factor: 2.0 + scale_step_size: 0.25 + - type: RandomPaddingCrop # 对图像和标注图进行随机裁剪 + crop_size: [1024, 512] + - type: RandomHorizontalFlip # 以一定的概率对图像进行水平翻转 + - type: Normalize # 对图像进行标准化 + mode: train # 训练模式 + +val_dataset: # 验证数据集 + type: Cityscapes # 验证数据集类的名称。 + dataset_root: data/cityscapes # 验证数据集存放的目录 + transforms: + - type: Normalize # 对图像进行标准化 + mode: val # 验证模式 + +optimizer: # 使用何种优化器 + type: sgd # 随机梯度下降 + momentum: 0.9 + weight_decay: 4.0e-5 + +lr_scheduler: # 学习率的相关设置 + type: PolynomialDecay # 一种学习率类型。共支持12种策略 + learning_rate: 0.01 + power: 0.9 + end_lr: 0 + +loss: # 使用何种损失函数 + types: + - type: CrossEntropyLoss # 交叉熵损失函数 + coef: [1] #当使用了多种损失函数,可在 coef 中为每种损失指定配比 + +model: # 使用何种语义分割模型 + type: FCN + backbone: # 使用何种骨干网络 + type: HRNet_W18 + pretrained: pretrained_model/hrnet_w18_ssld #指定预训练模型的存储路径 + num_classes: 19 # 像素类别数 + pretrained: Null + backbone_indices: [-1] + +``` diff --git a/docs/evaluation/evaluate/evaluate.md b/docs/evaluation/evaluate/evaluate.md index 9794f10789..cd846f94db 100644 --- a/docs/evaluation/evaluate/evaluate.md +++ b/docs/evaluation/evaluate/evaluate.md @@ -1,10 +1,11 @@ -## 模型评估 +English|[简体中文](evaluate_cn.md) +## Model Evaluating -### 1.**配置化驱动**方式下评估和预测 +### 1. Evaluation and Prediction under **Configuration-Driven** Approach -#### 评估 +#### Evaluating -训练完成后,用户可以使用评估脚本val.py来评估模型效果。假设训练过程中迭代次数(iters)为1000,保存模型的间隔为500,即每迭代1000次数据集保存2次训练模型。因此一共会产生2个定期保存的模型,加上保存的最佳模型best_model,一共有3个模型,可以通过model_path指定期望评估的模型文件。 +After the training, the user can use the evaluation script val.py to evaluate the effect of the model. Assuming that the number of iterations (iters) in the training process is 1000, the interval for saving the model is 500, that is, the training model is saved twice for every 1000 iterations of the data set. Therefore, a total of 2 regularly saved models will be generated, plus the best saved model `best_model`, there are a total of 3 models, and the model file that you want to evaluate can be specified by `model_path`. ``` !python val.py \ @@ -12,7 +13,7 @@ --model_path output/iter_1000/model.pdparams ``` -如果想进行多尺度翻转评估可通过传入`--aug_eval`进行开启,然后通过`--scales`传入尺度信息, `--flip_horizontal`开启水平翻转, `flip_vertical`开启垂直翻转。使用示例如下: +If you want to perform multi-scale flip evaluation, you can turn it on by passing in `--aug_eval`, and then passing in scale information via `--scales`, `--flip_horizontal` turns on horizontal flip, and `flip_vertical` turns on vertical flip. Examples are as follows: ``` python val.py \ @@ -23,7 +24,7 @@ python val.py \ --flip_horizontal ``` -如果想进行滑窗评估可通过传入`--is_slide`进行开启, 通过`--crop_size`传入窗口大小, `--stride`传入步长。使用示例如下: +If you want to perform sliding window evaluation, you can open it by passing in `--is_slide`, pass in the window size by `--crop_size`, and pass in the step size by `--stride`. Examples of usage are as follows: ``` python val.py \ @@ -34,15 +35,15 @@ python val.py \ --stride 128 128 ``` -在图像分割领域中,评估模型质量主要是通过三个指标进行判断,准确率(acc)、平均交并比(Mean Intersection over Union,简称mIoU)、Kappa系数。 +In the image segmentation, evaluating model quality is mainly judged by three indicators, `accuracy` (acc), `mean intersection over union` (mIoU), and `Kappa coefficient`. -- 准确率:指类别预测正确的像素占总像素的比例,准确率越高模型质量越好。 -- 平均交并比:对每个类别数据集单独进行推理计算,计算出的预测区域和实际区域交集除以预测区域和实际区域的并集,然后将所有类别得到的结果取平均。在本例中,正常情况下模型在验证集上的mIoU指标值会达到0.80以上,显示信息示例如下所示,第3行的**mIoU=0.8526**即为mIoU。 -- Kappa系数:一个用于一致性检验的指标,可以用于衡量分类的效果。kappa系数的计算是基于混淆矩阵的,取值为-1到1之间,通常大于0。其公式如下所示,P0P_0*P*0为分类器的准确率,PeP_e*P**e*为随机分类器的准确率。Kappa系数越高模型质量越好。 +- **Accuracy**: refers to the proportion of pixels with correct category prediction to the total pixels. The higher the accuracy, the better the quality of the model. +- **Average intersection ratio**: perform inference calculations for each category dataset separately, divide the calculated intersection of the predicted area and the actual area by the union of the predicted area and the actual area, and then average the results of all categories. In this example, under normal circumstances, the mIoU index value of the model on the verification set will reach 0.80 or more. An example of the displayed information is shown below. The **mIoU=0.8526** in the third row is mIoU. +- **Kappa coefficient**: an index used for consistency testing, which can be used to measure the effect of classification. The calculation of the kappa coefficient is based on the confusion matrix, with a value between -1 and 1, usually greater than 0. The formula is as follows, P0P_0*P*0 is the accuracy of the classifier, and PeP_e*P**e* is the accuracy of the random classifier. The higher the Kappa coefficient, the better the model quality. -Kappa=P0−Pe1−PeKappa= \frac{P_0-P_e}{1-P_e}*K**a**p**p**a*=1−*P**e**P*0−*P**e* + -随着评估脚本的运行,最终打印的评估日志如下。 +With the running of the evaluation script, the final printed evaluation log is as follows. ``` ... @@ -55,29 +56,11 @@ Kappa=P0−Pe1−PeKappa= \frac{P_0-P_e}{1-P_e}*K**a**p**p**a*=1−*P**e**P*0− [0.9959 0.8886] ``` -#### 预测 +### 2.Evaluation and Prediction under **API** Approach -除了分析模型的IOU、ACC和Kappa指标之外,我们还可以查阅一些具体样本的切割样本效果,从Bad Case启发进一步优化的思路。 +#### Evaluating -predict.py脚本是专门用来可视化预测案例的,命令格式如下所示 - -``` -!python predict.py \ - --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ - --model_path output/iter_1000/model.pdparams \ - --image_path dataset/optic_disc_seg/JPEGImages/H0003.jpg \ - --save_dir output/result -``` - -其中`image_path`也可以是一个目录,这时候将对目录内的所有图片进行预测并保存可视化结果图。 - -同样的,可以通过`--aug_pred`开启多尺度翻转预测, `--is_slide`开启滑窗预测。 - -### 2.**API**方式下评估和预测 - -#### 评估 - -构建模型 +Construct Model ``` from paddleseg.models import BiSeNetV2 model = BiSeNetV2(num_classes=2, @@ -86,29 +69,29 @@ model = BiSeNetV2(num_classes=2, pretrained=None) ``` -加载模型参数 +Load Model Parameters ``` -model_path = 'output/best_model/model.pdparams'#最优模型路径 +model_path = 'output/best_model/model.pdparams'# Path of best model if model_path: para_state_dict = paddle.load(model_path) - model.set_dict(para_state_dict) #加载模型参数 + model.set_dict(para_state_dict) # Load parameters print('Loaded trained params of model successfully') else: raise ValueError('The model_path is wrong: {}'.format(model_path)) ``` -构建验证集 +Construct Validation Dataset ``` -# 构建验证用的transforms +# Define transforms for verification import paddleseg.transforms as T transforms = [ T.Resize(target_size=(512, 512)), T.Normalize() ] -# 构建验证集 +# Construct validation dataset from paddleseg.datasets import OpticDiscSeg val_dataset = OpticDiscSeg( dataset_root='data/optic_disc_seg', @@ -116,8 +99,8 @@ val_dataset = OpticDiscSeg( mode='val' ) ``` +**Evaluate** API Parameter Analysis -评估 **evaluate** API,参数解析 ``` paddleseg.core.evaluate( @@ -134,22 +117,27 @@ paddleseg.core.evaluate( ) ``` -- 参数说明如下 +- Parameters -| 参数名 | 数据类型 | 用途 | 是否必选项 | 默认值 | +| Parameter | Types | Effection | Is Required | Default | | --------------- | ----------------- | ---------------------------------------------------- | ---------- | ------ | -| model | nn.Layer | 分割模型 | 是 | - | -| eval_dataset | paddle.io.Dataset | 验证集DataSet | 是 | - | -| aug_eval | bool | 是否使用数据增强 | 否 | False | -| scales | list/float | 多尺度评估,aug_eval为True时生效 | 否 | 1.0 | -| flip_horizontal | bool | 是否使用水平翻转,aug_eval为True时生效 | 否 | True | -| flip_vertical | bool | 是否使用垂直翻转,aug_eval为True时生效 | 否 | False | -| is_slide | bool | 是否通过滑动窗口进行评估 | 否 | False | -| stride | tuple/list | 设置滑动窗宽的宽度和高度,is_slide为True时生效 | 否 | None | -| crop_size | tuple/list | 设置滑动窗口的裁剪的宽度和高度,is_slide为True时生效 | 否 | None | -| num_workers | int | 多线程数据加载 | 否 | 0 | +| model | nn.Layer | Segmentation model | Yes | - | +| eval_dataset | paddle.io.Dataset | Validation dataSet | Yes | - | +| aug_eval | bool | Whether to use data augmentation | No | False | +| scales | list/float | Set the zoom factor, take effect when aug_pred is True | No | 1.0 | +| flip_horizontal | bool | Whether to use horizontal flip, take effect when `aug_eval` is True | No | True | +| flip_vertical | bool | Whether to use vertical flip, take effect when `aug_eval` is True | No | False | +| is_slide | bool | Whether to evaluate through a sliding window | No | False | +| stride | tuple/list | Set the width and height of the sliding window, effective when `is_slide` is True | No | None | +| crop_size | tuple/list | Set the width and height of the crop of the sliding window, which takes effect when `is_slide` is True | No | None | +| num_workers | int | Multi-threaded data loading | No | 0 | + + + +**Note** If you want to improve the memory utilization, you can increase the setting of num_workers appropriately to prevent the GPU from waiting during work. + -导入**evaluate** API接口,开始评估 +Import the API interface and start the evaluation ``` from paddleseg.core import evaluate @@ -159,7 +147,7 @@ evaluate( ) ``` -多尺度+翻转评估 +Multi-scale , flip evaluation ``` evaluate( @@ -169,57 +157,3 @@ evaluate( scales=[0.75, 1.0, 1.25], #缩放因子 flip_horizontal=True) #是否水平翻转 ``` - - -#### 预测 -预测 **predict** API,参数解析 - -``` -paddleseg.core.predict( - model, - model_path, - transforms, - image_list, - image_dir=None, - save_dir='output', - aug_pred=False, - scales=1.0, - flip_horizontal=True, - flip_vertical=False, - is_slide=False, - stride=None, - crop_size=None -) -``` - -- 参数说明如下 - -| 参数名 | 数据类型 | 用途 | 是否必选项 | 默认值 | -| --------------- | ----------------- | ---------------------------------------------------- | ---------- | -------- | -| model | nn.Layer | 分割模型 | 是 | - | -| model_path | str | 训练最优模型的路径 | 是 | - | -| transforms | transform.Compose | 对输入图像进行预处理 | 是 | - | -| image_list | list | 待预测的图像路径列表 | 是 | - | -| image_dir | str | 待要预测的图像路径目录 | 否 | None | -| save_dir | str | 结果输出路径 | 否 | 'output' | -| aug_pred | bool | 是否使用多尺度和翻转增广进行预测 | 否 | False | -| scales | list/float | 设置缩放因子,aug_pred为True时生效 | 否 | 1.0 | -| flip_horizontal | bool | 是否使用水平翻转,aug_eval为True时生效 | 否 | True | -| flip_vertical | bool | 是否使用垂直翻转,aug_eval为True时生效 | 否 | False | -| is_slide | bool | 是否通过滑动窗口进行评估 | 否 | False | -| stride | tuple/list | 设置滑动窗宽的宽度和高度,is_slide为True时生效 | 否 | None | -| crop_size | tuple/list | 设置滑动窗口的裁剪的宽度和高度,is_slide为True时生效 | 否 | None | - -导入**predict** API接口,开始预测 - -``` -from paddleseg.core import predict -predict( - model, - model_path='output/best_model/model.pdparams',# 模型路径 - transforms=transforms, #transform.Compose, 对输入图像进行预处理 - image_list=image_list, #list,待预测的图像路径列表。 - image_dir=image_dir, #str,待预测的图片所在目录 - save_dir='output/results' #str,结果输出路径 - ) -``` diff --git a/docs/evaluation/evaluate/evaluate_cn.md b/docs/evaluation/evaluate/evaluate_cn.md new file mode 100644 index 0000000000..2b7f9f6be7 --- /dev/null +++ b/docs/evaluation/evaluate/evaluate_cn.md @@ -0,0 +1,157 @@ +简体中文|[English](evaluate.md) +## 模型评估 + +### 1.**配置化驱动**方式下评估和预测 + +#### 评估 + +训练完成后,用户可以使用评估脚本val.py来评估模型效果。假设训练过程中迭代次数(iters)为1000,保存模型的间隔为500,即每迭代1000次数据集保存2次训练模型。因此一共会产生2个定期保存的模型,加上保存的最佳模型`best_model`,一共有3个模型,可以通过`model_path`指定期望评估的模型文件。 + +``` +!python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams +``` + +如果想进行多尺度翻转评估,可通过传入`--aug_eval`进行开启,然后通过`--scales`传入尺度信息, `--flip_horizontal`开启水平翻转, `flip_vertical`开启垂直翻转。使用示例如下: + +``` +python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --aug_eval \ + --scales 0.75 1.0 1.25 \ + --flip_horizontal +``` + +如果想进行滑窗评估,可通过传入`--is_slide`进行开启, 通过`--crop_size`传入窗口大小, `--stride`传入步长。使用示例如下: + +``` +python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --is_slide \ + --crop_size 256 256 \ + --stride 128 128 +``` + +在图像分割领域中,评估模型质量主要是通过三个指标进行判断,准确率(acc)、平均交并比(Mean Intersection over Union,简称mIoU)、Kappa系数。 + +- 准确率:指类别预测正确的像素占总像素的比例,准确率越高模型质量越好。 +- 平均交并比:对每个类别数据集单独进行推理计算,计算出的预测区域和实际区域交集除以预测区域和实际区域的并集,然后将所有类别得到的结果取平均。在本例中,正常情况下模型在验证集上的mIoU指标值会达到0.80以上,显示信息示例如下所示,第3行的**mIoU=0.8526**即为mIoU。 +- Kappa系数:一个用于一致性检验的指标,可以用于衡量分类的效果。kappa系数的计算是基于混淆矩阵的,取值为-1到1之间,通常大于0。其公式如下所示,P0P_0*P*0为分类器的准确率,PeP_e*P**e*为随机分类器的准确率。Kappa系数越高模型质量越好。 + + + +随着评估脚本的运行,最终打印的评估日志如下。 + +``` +... +2021-01-13 16:41:29 [INFO] Start evaluating (total_samples=76, total_iters=76)... +76/76 [==============================] - 2s 30ms/step - batch_cost: 0.0268 - reader cost: 1.7656e- +2021-01-13 16:41:31 [INFO] [EVAL] #Images=76 mIoU=0.8526 Acc=0.9942 Kappa=0.8283 +2021-01-13 16:41:31 [INFO] [EVAL] Class IoU: +[0.9941 0.7112] +2021-01-13 16:41:31 [INFO] [EVAL] Class Acc: +[0.9959 0.8886] +``` + +### 2.**API**方式下评估和预测 + +#### 评估 + +构建模型 +``` +from paddleseg.models import BiSeNetV2 +model = BiSeNetV2(num_classes=2, + lambd=0.25, + align_corners=False, + pretrained=None) +``` + +加载模型参数 + +``` +model_path = 'output/best_model/model.pdparams'#最优模型路径 +if model_path: + para_state_dict = paddle.load(model_path) + model.set_dict(para_state_dict) #加载模型参数 + print('Loaded trained params of model successfully') +else: + raise ValueError('The model_path is wrong: {}'.format(model_path)) +``` + +构建验证集 + +``` +# 构建验证用的transforms +import paddleseg.transforms as T +transforms = [ + T.Resize(target_size=(512, 512)), + T.Normalize() +] + +# 构建验证集 +from paddleseg.datasets import OpticDiscSeg +val_dataset = OpticDiscSeg( + dataset_root='data/optic_disc_seg', + transforms=transforms, + mode='val' +) +``` + +**Evaluate** API 参数解析 + +``` +paddleseg.core.evaluate( + model, + eval_dataset, + aug_eval=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + num_workers=0 +) +``` + +- 参数说明如下 + +| 参数名 | 数据类型 | 用途 | 是否必选项 | 默认值 | +| --------------- | ----------------- | ---------------------------------------------------- | ---------- | ------ | +| model | nn.Layer | 分割模型 | 是 | - | +| eval_dataset | paddle.io.Dataset | 验证集DataSet | 是 | - | +| aug_eval | bool | 是否使用数据增强 | 否 | False | +| scales | list/float | 多尺度评估,aug_eval为True时生效 | 否 | 1.0 | +| flip_horizontal | bool | 是否使用水平翻转,aug_eval为True时生效 | 否 | True | +| flip_vertical | bool | 是否使用垂直翻转,aug_eval为True时生效 | 否 | False | +| is_slide | bool | 是否通过滑动窗口进行评估 | 否 | False | +| stride | tuple/list | 设置滑动窗宽的宽度和高度,is_slide为True时生效 | 否 | None | +| crop_size | tuple/list | 设置滑动窗口的裁剪的宽度和高度,is_slide为True时生效 | 否 | None | +| num_workers | int | 多线程数据加载 | 否 | 0 | + +**注意** 如果你想提升显存利用率,可以适当的提高 num_workers 的设置,以防GPU工作期间空等。 + + +导入API接口,开始评估 + +``` +from paddleseg.core import evaluate +evaluate( + model, + val_dataset #paddle.io.Dataset,验证集DataSet +) +``` + +多尺度+翻转评估 + +``` +evaluate( + model, + val_dataset, + aug_eval=True, #是否使用数据增强 + scales=[0.75, 1.0, 1.25], #缩放因子 + flip_horizontal=True) #是否水平翻转 +``` diff --git a/docs/faq/faq/faq.md b/docs/faq/faq/faq.md index a3896617bd..d063ef2634 100644 --- a/docs/faq/faq/faq.md +++ b/docs/faq/faq/faq.md @@ -1,2 +1,99 @@ +English | [简体中文](faq_cn.md) # FAQ -coming soon + +## Q1: How to load the weight parameters of the pre-trained model locally? + +* **Answer**: + +> The recommended configuration parameters of the model are stored in the yaml file of each model folder under PaddleSeg/configs. For example, one of the configurations of ANN is given in /PaddleSeg/configs/ann/ann_resnet50_os8_cityscapes_1024x512_80k.yml. As shown below: + + +![](./faq_imgs/ann_config.png) + + +> The red part in the figure is the storage location of the pre-training model parameter file of the backbone network. **Note**: Here, we will download the pre-training model parameters provided by us directly in the form of a https link. If you have the pre-trained model parameters of the backbone network locally, please replace the `pretrained` under `backbone` in the yaml file with the absolute path. Or, you should set the relative path for the storage location of the pre-training model parameters according to the directory where the `train.py` will be executed. + +> The green part in the figure is the storage location of the pre-training model parameter file of the segmentation network. If you have the pre-trained model parameters of the segmentation network locally, please replace the `pretrained` in the yaml file with the absolute path or relative path where it is stored. + + + +## Q2: Why PaddleSeg do not setting epoch? +* **Answer**: + +> The way to set the `epoch` will be affected by the size of the dataset. So we support set `iters`. + +> **Supplement:** The relationship between training configuration parameters + +- Define the parameters as follows: + - 1. Dataset size: N + - 2. Batch size: batch_size + - 3. Number of GPUs: num_gpus + - 4. Total number of iterations: iters + +- Then: + - epoch = (iters * batch_size * num_gpus) / N + + + +## Q3: What is the loading sequence of the data augmentation configuration? +* **Answer**: + +> Since the configuration of data augmentation is specified in the yaml file, it's important to introduce the basic knowledge of the configuration file in PaddleSeg. + +> Cityscapes is one of the most commonly used datasets in the field of semantic segmentation, so some common configurations on Cityscapes have been given. + +> PaddleSeg uses `_base_` to specify the inheritance relationship between configurations: + + +```yaml +_base_: '../_base_/cityscapes.yml' +_base_: '../_base_/cityscapes_1024x1024.yml' +``` + + +- Instruction: + - 1. Data augmentation is specified by `transforms` and loaded sequentially from top to bottom. + - 2. The subclass overrides the configuration of the same name in its parent class. + - 3. The command line (such as `--batch_size 4`) overwrites the configuration with the same name inside --config (such as `batch_size: 8` specified in yaml). + + +## Q4: Why does the data augmentation configuration cause a DataLoader reader thread error? +* **Answer**: + +> If you are using a custom dataset with inconsistent shapes, this may be an error caused by improper data augmentation loading sequence. + +> In Q3, we already know that PaddleSeg's data augmentation configuration is loaded in order. + +> For example, `RandomRotation` will change the size of the image. If it is set after other augmentations to correct the size (such as Crop, Resize), the image size will be inconsistent. This causes a DataLoader reader thread error. + +> Therefore, before starting training, please refer to Q3 to check the data augmentation configuration sequence. + + +## Q5: What is the current SOTA model of PaddleSeg on CityScapes? +* **Answer**: + +> The current SOTA model on CityScapes can reach 87% mIoU. + +> Please refer: https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/CityscapesSOTA + + +## Q6: Why is best_model not saved during training? +* **Answer**: + +> The `best_model` is obtained by evaluation and comparison during the training process. + +> Therefore, you need to enable the option `--do_eval` before training. + + +## Q7: After resuming training, why does vdl only visualize the second half? How to visualize the interrupted half? + +> Due to some reasons, the model may not be trained at one time. +A relatively simple method is: copy the contents of the first generated log and the second generated log to a new binary file, then read it. + +> We will support multiple log merging in the new version in the near future. + +> In addition, if it is similar to the situation of continuing training after interruption, you can specify the log name when calling visualdl, so that you can continue to write directly in the specified log file. + +> Please refer: https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/faq_CN.md#%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E5%B7%B2%E6%9C%89%E7%9A%84%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6 + + diff --git a/docs/faq/faq/faq_cn.md b/docs/faq/faq/faq_cn.md new file mode 100644 index 0000000000..08cdaa00cf --- /dev/null +++ b/docs/faq/faq/faq_cn.md @@ -0,0 +1,94 @@ +简体中文 | [English](faq.md) +# FAQ + +## Q1: PaddleSeg 如何从本地加载预训练模型的权重参数? + +* **Answer**: + +> PaddleSeg 模型的推荐配置参数统一存放在 PaddleSeg/configs 下各个模型文件夹的 yaml 文件中。比如 ANN 的其中一个配置在 /PaddleSeg/configs/ann/ann_resnet50_os8_cityscapes_1024x512_80k.yml 中给出。如下图所示: + +![](./faq_imgs/ann_config.png) + + +> 图中红色部分为骨干网络的预训练模型参数文件的存放位置。**请注意**:此处将直接以 https 链接形式下载我们提供的预训练模型参数。如果你在本地拥有骨干网络的预训练模型参数,请用其存放的绝对路径替换该 yaml 文件中 `backbone` 下的 `pretrained`。或者,你可以根据将要执行的 `train.py` 所在的目录为该预训练模型参数的存放位置设置相对路径。 + +> 图中绿色部分为分割网络的预训练模型参数文件的存放位置。如果你在本地拥有分割网络的预训练模型参数,请用其存放的绝对路径或相对路径替换该 yaml 文件中的 `pretrained`。 + + +## Q2: 为什么PaddleSeg不采用设置epoch的方式? +* **Answer**: + +> 设置 `epoch` 的方式会受数据集大小的影响。因此PaddleSeg 按照 `iters` 进行设置。 + +> **补充:** 常见训练配置参数之间的关系 + +- 定义参数如下: + - 1. 数据集大小: N + - 2. 批量大小: batch_size + - 3. GPU数量: num_gpus + - 4. 总迭代次数: iters + +- 则有: + - epoch = (iters * batch_size * num_gpus) / N + + + +## Q3: 数据增强配置的加载顺序是怎样的? +* **Answer**: + +> 由于数据增强的配置要在yaml文件中进行指定,先介绍一下PaddleSeg中配置文件的基本知识。 + +> Cityscapes是图像分割领域最常使用的数据集之一,因此Cityscapes上的一些常用配置已经给出。 + +> PaddleSeg以 `_base_` 指定配置之间的继承关系: + + +```yaml +_base_: '../_base_/cityscapes.yml' +_base_: '../_base_/cityscapes_1024x1024.yml' +``` + + +- 说明: + - 1. 数据增强以 `transforms` 所指定,由上到下依次加载。 + - 2. 子类覆盖父类中的同名配置。 + - 3. 命令行(如 `--batch_size 4`)覆盖 --config 内部的同名配置(如yaml中指定的`batch_size: 8`)。 + + +## Q4: 数据增强配置为何会引起 DataLoader reader thread 错误? +* **Answer**: + +> 如果你使用的是shape各不一致的自定义数据集,这可能是由于不得当的数据增强加载顺序引起的错误。 + +> 在Q3中,我们已经知道PaddleSeg的数据增强配置是按顺序加载的。 + +> 例如,`RandomRotation` 会改变图片大小,如果在其他修正尺寸的增强配置之后设置它(如Crop、Resize),将导致图像尺寸的不一致。从而引发 DataLoader reader thread 错误。 + +> 因此,在开启训练之前,请参照Q3,仔细检查数据增强配置顺序。 + + +## Q5: 目前PaddleSeg在CityScapes上SOTA的模型是什么? +* **Answer**: + +> 目前在CityScapes上SOTA的模型可达到87%的mIoU。 + +> 详见: https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/CityscapesSOTA + + +## Q6: 为什么训练过程中不保存best_model? +* **Answer**: +> best_model是在训练过程中验证对比得到的。因此需要在训练前开启选项 `--do_eval`。 + + +## Q7: 恢复训练后,为什么vdl仅仅可视化了后半部分?如何对训练中断之前进行可视化? + +> 由于算力限制、各种不可抗力,模型可能未能一次训练完毕。 +一个比较简单的办法是:把第一次生成的日志和第二次生成的日志中的内容拷贝到一个新的二进制文件中,然后读取。 + +> 我们将会在不久的将来在新版本中支持多个日志合并。 + +> 另外,如果是类似这种中断后继续训练的情况,可以在调用visualdl的时候指定日志名,这样就可以直接在指定的日志文件中继续续写了。 + +> 资料见:https://github.com/PaddlePaddle/VisualDL/blob/develop/docs/faq_CN.md#%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E5%B7%B2%E6%9C%89%E7%9A%84%E6%97%A5%E5%BF%97%E6%96%87%E4%BB%B6 + + diff --git a/docs/faq/faq/faq_imgs/ann_config.png b/docs/faq/faq/faq_imgs/ann_config.png new file mode 100644 index 0000000000..fd00a618b7 Binary files /dev/null and b/docs/faq/faq/faq_imgs/ann_config.png differ diff --git a/docs/images/QQ20210309-213137.png b/docs/images/QQ20210309-213137.png deleted file mode 100644 index 6c835abc75..0000000000 Binary files a/docs/images/QQ20210309-213137.png and /dev/null differ diff --git a/docs/images/cityscapes.gif b/docs/images/cityscapes.gif deleted file mode 100644 index b8faa4711b..0000000000 Binary files a/docs/images/cityscapes.gif and /dev/null differ diff --git a/docs/images/eiseg_demo.gif b/docs/images/eiseg_demo.gif new file mode 100644 index 0000000000..79f33db69a Binary files /dev/null and b/docs/images/eiseg_demo.gif differ diff --git "a/docs/images/readme/\344\272\214\346\254\241\345\205\203.gif" "b/docs/images/readme/\344\272\214\346\254\241\345\205\203.gif" new file mode 100644 index 0000000000..f71d9813b5 Binary files /dev/null and "b/docs/images/readme/\344\272\214\346\254\241\345\205\203.gif" differ diff --git "a/docs/images/readme/\344\272\272\344\275\223\350\247\243\346\236\220.gif" "b/docs/images/readme/\344\272\272\344\275\223\350\247\243\346\236\220.gif" new file mode 100644 index 0000000000..f1b40598cf Binary files /dev/null and "b/docs/images/readme/\344\272\272\344\275\223\350\247\243\346\236\220.gif" differ diff --git "a/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262-0.gif" "b/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262-0.gif" new file mode 100644 index 0000000000..5a5570ce58 Binary files /dev/null and "b/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262-0.gif" differ diff --git "a/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262.gif" "b/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262.gif" new file mode 100644 index 0000000000..e6bc62625b Binary files /dev/null and "b/docs/images/readme/\344\272\272\345\203\217\345\210\206\345\211\262.gif" differ diff --git a/docs/images/seg_weichat.png b/docs/images/seg_weichat.png deleted file mode 100644 index 2319f1c6d3..0000000000 Binary files a/docs/images/seg_weichat.png and /dev/null differ diff --git a/docs/images/wechat_qq.png b/docs/images/wechat_qq.png deleted file mode 100644 index 1aee33d5b8..0000000000 Binary files a/docs/images/wechat_qq.png and /dev/null differ diff --git a/docs/images/wechat_qq_623.png b/docs/images/wechat_qq_623.png deleted file mode 100644 index 1c6e2ea3e9..0000000000 Binary files a/docs/images/wechat_qq_623.png and /dev/null differ diff --git a/docs/images/wechat_qq_630.png b/docs/images/wechat_qq_630.png deleted file mode 100644 index 3a97524d3e..0000000000 Binary files a/docs/images/wechat_qq_630.png and /dev/null differ diff --git a/docs/images/xingnengtu.png b/docs/images/xingnengtu.png new file mode 100644 index 0000000000..d02fef55cf Binary files /dev/null and b/docs/images/xingnengtu.png differ diff --git a/docs/install.md b/docs/install.md index a4bc430a65..b54432fb45 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,19 +1,19 @@ -# 安装文档 +English|[简体中文](install_cn.md) +# Instruction of Installation +## Environment Requirements -## 环境要求 +- PaddlePaddle 2.1 (Get API support) +- OS: 64-bit(Going to run 64-bit programs) +- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64-bit version +- pip/pip3(9.0.1+),64-bit version (Get environment support) +- CUDA >= 10.1 (NVIDIA GPU Parallel Computing Framework) +- cuDNN >= 7.6 (NVIDIA GPU acceleration library) -- PaddlePaddle 2.1 -- OS 64位操作系统 -- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64位版本 -- pip/pip3(9.0.1+),64位版本 -- CUDA >= 10.1 -- cuDNN >= 7.6 +## Instruction of Installation -## 安装说明 - -### 1. 安装PaddlePaddle +### 1. Install PaddlePaddle ``` # CUDA10.1 @@ -22,34 +22,39 @@ python -m pip install paddlepaddle-gpu==2.1.0.post101 -i https://paddlepaddle.or # CPU python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple ``` -- 更多CUDA版本或环境快速安装,请参考[PaddlePaddle快速安装文档](https://www.paddlepaddle.org.cn/install/quick) -- 更多安装方式例如conda或源码编译安装方法,请参考[PaddlePaddle安装文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/index_cn.html) +-For quick installation of more CUDA versions or environments, please refer to [PaddlePaddle Quick Installation Document](https://www.paddlepaddle.org.cn/install/quick) +-For more installation methods such as conda or source code compilation and installation methods, please refer to [PaddlePaddle Installation Document](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/index_cn.html) -请确保您的PaddlePaddle安装成功并且版本不低于需求版本。使用以下命令进行验证。 +Please make sure that your PaddlePaddle is installed successfully and the version is not lower than the required version. Use the following command to verify. ``` -# 在您的Python解释器中确认PaddlePaddle安装成功 +# Confirm that PaddlePaddle is installed successfully in your Python interpreter >>> import paddle >>> paddle.utils.run_check() -# 确认PaddlePaddle版本 +# Confirm PaddlePaddle version python -c "import paddle; print(paddle.__version__)" + +# If the following prompt appears on the command line, the PaddlePaddle installation is successful. +# PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. ``` -## 2.下载PaddleSeg代码 +## 2.Install PaddlePaddle Code ``` git clone https://github.com/PaddlePaddle/PaddleSeg ``` -## 3.安装PaddleSeg依赖 +## 3.Install PaddleSeg Requirements ``` cd PaddleSeg pip install -r requirements.txt + +#If a version error occurs during installation, you can try to delete the old version and re-run the script. ``` -## 4.确认环境安装成功 +## 4.Confirm Installation -执行下面命令,并在PaddleSeg/output文件夹中出现预测结果,则证明安装成功 +Execute the following command, and the predicted result appears in the PaddleSeg/output folder, it proves that the installation is successful. ```python python predict.py \ diff --git a/docs/install_cn.md b/docs/install_cn.md new file mode 100644 index 0000000000..65386d4b78 --- /dev/null +++ b/docs/install_cn.md @@ -0,0 +1,65 @@ +简体中文|[English](install.md) +# 安装文档 + + +## 环境要求 + +- PaddlePaddle 2.1 (获取底层API支持) +- OS 64位操作系统 (运行64位程序) +- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64位版本 +- pip/pip3(9.0.1+),64位版本 (提供环境支持) +- CUDA >= 10.1 (NVIDIA GPU 并行计算框架) +- cuDNN >= 7.6 (NVIDIA GPU 加速库) + +## 安装说明 + +### 1. 安装PaddlePaddle + +``` +# CUDA10.1 +python -m pip install paddlepaddle-gpu==2.1.0.post101 -i https://paddlepaddle.org.cn/whl/mkl/stable.html + +# CPU +python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple +``` +- 更多CUDA版本或环境快速安装,请参考[PaddlePaddle快速安装文档](https://www.paddlepaddle.org.cn/install/quick) +- 更多安装方式例如conda或源码编译安装方法,请参考[PaddlePaddle安装文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/install/index_cn.html) + +请确保您的PaddlePaddle安装成功并且版本不低于需求版本。使用以下命令进行验证。 + +``` +# 在您的Python解释器中确认PaddlePaddle安装成功 +>>> import paddle +>>> paddle.utils.run_check() + +# 确认PaddlePaddle版本 +python -c "import paddle; print(paddle.__version__)" + +# 如果命令行出现以下提示,说明PaddlePaddle安装成功 +# PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. +``` + + + +## 2.下载PaddleSeg代码 +``` +git clone https://github.com/PaddlePaddle/PaddleSeg +``` +## 3.安装PaddleSeg依赖 +``` +cd PaddleSeg +pip install -r requirements.txt + +#如果安装时出现版本错误,可以尝试删除旧版本,重新运行该脚本。 +``` +## 4.确认环境安装成功 + +执行下面命令,并在PaddleSeg/output文件夹中出现预测结果,则证明安装成功 + +```python +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path https://bj.bcebos.com/paddleseg/dygraph/optic_disc/bisenet_optic_disc_512x512_1k/model.pdparams\ + --image_path docs/images/optic_test_image.jpg \ + --save_dir output/result +``` diff --git a/docs/module/loss/BCELoss_cn.md b/docs/module/loss/BCELoss_cn.md new file mode 100644 index 0000000000..d8f8d91d1a --- /dev/null +++ b/docs/module/loss/BCELoss_cn.md @@ -0,0 +1,28 @@ +简体中文 | [English](BCELoss_en.md) +# [BCELoss](../../../paddleseg/models/losses/binary_cross_entropy_loss.py) + + +二元交叉熵适合处理二分类与多标签分类任务。二元交叉熵以标注图的概率模型为基准,用二分类语义分割模型计算KL散度,根据吉布斯不等式知二者的交叉熵大于语义分割概率模型的熵。计算BCELoss时,我们通常忽略语义分割概率模型的熵(因为它是常量),仅将KL散度的一部分作为损失函数。 + + +```python +class paddleseg.models.losses.BCELoss( + weight = None, + pos_weight = None, + ignore_index = 255, + edge_label = False +) +``` + +## BCELoss 使用指南 + + +### 参数 +* **weight** (Tensor | str, optional): 对每个批数据元素的损失手动地重新调整权重。如果设定该参数, +且若传入的是一个 1D 张量,则其尺寸为 `[N, ]`,其数据类型为 float32 或 float64; +若传入的是一个 str,则值必须指定为 'dynamic',以使在每轮迭代中根据二元交叉熵动态的计算权重。 + *默认:``'None'``* +* **pos_weight** (float|str, optional): 正样本的权重。若传入的是一个 str,则值必须指定为 'dynamic',以使在每轮迭代中动态的计算权重。 + *默认:``'None'``* +* **ignore_index** (int64, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* +* **edge_label** (bool, optional): 是否使用边缘标签。 *默认:``False``* \ No newline at end of file diff --git a/docs/module/loss/BCELoss_en.md b/docs/module/loss/BCELoss_en.md new file mode 100644 index 0000000000..646ae73ce4 --- /dev/null +++ b/docs/module/loss/BCELoss_en.md @@ -0,0 +1,31 @@ +English | [简体中文](BCELoss_cn.md) +# [BCELoss](../../../paddleseg/models/losses/binary_cross_entropy_loss.py) + + +Binary cross entropy is suitable for handling binary classification and multi-label classification tasks.The binary cross entropy is based on the probability model of the annotated map, and the binary semantic segmentation model is used to calculate the KL divergence. According to the Gibbs inequality, the cross entropy of the two is greater than the entropy of the semantic segmentation probability model. When calculating BCELoss, we usually ignore the entropy of the semantic segmentation probability model (because it is a constant), and only use a part of the KL divergence as the loss function. + + +```python +class paddleseg.models.losses.BCELoss( + weight = None, + pos_weight = None, + ignore_index = 255, + edge_label = False +) +``` + +## BCELoss usage guidance + + +### Args +* **weight** (Tensor | str, optional): A manual rescaling weight given to the loss of each + batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`,the data type is float32, float64. + If type is str, it should equal to 'dynamic'. + It will compute weight dynamically in every step. + *Default:``'None'``* +* **pos_weight** (float|str, optional): A weight of positive examples. If type is str, + it should equal to 'dynamic'. It will compute weight dynamically in every step. + *Default:``'None'``* +* **ignore_index** (int64, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient. When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* +* **edge_label** (bool, optional): Whether to use edge label. *Default:``False``* \ No newline at end of file diff --git a/docs/module/loss/BootstrappedCrossEntropyLoss_cn.md b/docs/module/loss/BootstrappedCrossEntropyLoss_cn.md new file mode 100644 index 0000000000..0d36b30eb6 --- /dev/null +++ b/docs/module/loss/BootstrappedCrossEntropyLoss_cn.md @@ -0,0 +1,20 @@ +简体中文 | [English](BootstrappedCrossEntropyLoss_en.md) +## [BootstrappedCrossEntropyLoss](../../../paddleseg/models/losses/bootstrapped_cross_entropy.py) +Bootstrapped 首先利用样本构造初始的分类起,然后对未标记样本进行迭代分类,进而利用扩展后的训练数据为未标记样本提取新的 seed rules。 +[参考文献](https://arxiv.org/pdf/1412.6596.pdf) +```python +class paddleseg.models.losses.BootstrappedCrossEntropyLoss( + min_K, + loss_th, + weight = None, + ignore_index = 255 +) +``` + +## Bootstrapped cross entropy loss 使用指南 + +### 参数 +* **min_K** (int): 在计算损失时,参与计算的最小像素数。 +* **loss_th** (float): 损失阈值。 只计算大于阈值的损失。 +* **weight** (tuple|list, optional): 不同类的权重。 *默认:``None``* +* **ignore_index** (int, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/BootstrappedCrossEntropyLoss_en.md b/docs/module/loss/BootstrappedCrossEntropyLoss_en.md new file mode 100644 index 0000000000..59085f2393 --- /dev/null +++ b/docs/module/loss/BootstrappedCrossEntropyLoss_en.md @@ -0,0 +1,24 @@ +English | [简体中文](BootstrappedCrossEntropyLoss_cn.md) +## [BootstrappedCrossEntropyLoss](../../../paddleseg/models/losses/bootstrapped_cross_entropy.py) + +Bootstrapped first uses samples to construct an initial classification, and then iteratively classifies unlabeled samples, and then uses the expanded training data to extract new seed rules for unlabeled samples. + +[paper](https://arxiv.org/pdf/1412.6596.pdf) +```python +class paddleseg.models.losses.BootstrappedCrossEntropyLoss( + min_K, + loss_th, + weight = None, + ignore_index = 255 +) +``` + +## Bootstrapped cross entropy loss usage guidance + +### Args +* **min_K** (int): the minimum number of pixels to be counted in loss computation. +* **loss_th** (float): The loss threshold. Only loss that is larger than the threshold + would be calculated. +* **weight** (tuple|list, optional): The weight for different classes. *Default:``None``* +* **ignore_index** (int, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* diff --git a/docs/module/loss/CrossEntropyLoss_cn.md b/docs/module/loss/CrossEntropyLoss_cn.md new file mode 100644 index 0000000000..907ff4e286 --- /dev/null +++ b/docs/module/loss/CrossEntropyLoss_cn.md @@ -0,0 +1,21 @@ +简体中文 | [English](CrossEntropyLoss_en.md) +## [CrossEntropyLoss](../../../paddleseg/models/losses/cross_entropy_loss.py) + + +交叉熵 (CE) 方法由于其简单性和有效性,允许调整不同类别像素的权重,成为了一种最流行的损失函数。在很多语义分割任务中,交叉熵依赖于足够多的目标函数调用来准确估计基础分布的最佳参数。 +CrossEntropyLoss常用于多像素类别的分割任务,其描述的是两个概率分布之间的不同,可以用来刻画当前模型与实际模型之间的差距(在训练过程中,我们暂时认为给出的标注集就是真实世界中的模型)。注意:机器学习算法中的逻辑回归是这种交叉熵的特例。 +```python +class paddleseg.models.losses.CrossEntropyLoss( + weight = None, + ignore_index = 255, + top_k_percent_pixels = 1.0 +) +``` + +## Cross entropy loss 使用指南 + +### 参数 +* **weight** (tuple|list|ndarray|Tensor, optional): 为每个像素类别的损失手动调整权重。它的长度必须等同于像素类别数。可在多类样本不均衡等情况下调整各类的权重。 + *默认 ``None``* +* **ignore_index** (int64, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* +* **top_k_percent_pixels** (float, optional): 该值的取值范围为 [0.0, 1.0]。 当该值 < 1.0 时,仅计算前 k% 像素(例如,前 20% 像素)的损失。 这将有助于对难分像素的挖掘。 \ No newline at end of file diff --git a/docs/module/loss/CrossEntropyLoss_en.md b/docs/module/loss/CrossEntropyLoss_en.md new file mode 100644 index 0000000000..d3a1945152 --- /dev/null +++ b/docs/module/loss/CrossEntropyLoss_en.md @@ -0,0 +1,23 @@ +English | [简体中文](CrossEntropyLoss_cn.md) +## [CrossEntropyLoss](../../../paddleseg/models/losses/cross_entropy_loss.py) + +The cross entropy (CE) method has become one of the most popular loss functions due to its simplicity and effectiveness, allowing adjustment of the weights of different categories of pixels. In many semantic segmentation tasks, cross entropy relies on enough objective function calls to accurately estimate the best parameters of the underlying distribution. +CrossEntropyLoss is often used for multi-pixel segmentation tasks. It describes the difference between two probability distributions. It can be used to describe the gap between the current model and the actual model (during the training process, we temporarily think that the given label set It is the model in the real world). Note: Logistic regression in machine learning algorithms is a special case of this kind of cross-entropy. +```python +class paddleseg.models.losses.CrossEntropyLoss( + weight = None, + ignore_index = 255, + top_k_percent_pixels = 1.0 +) +``` + +## Cross entropy loss usage guidance + +### Args +* **weight** (tuple|list|ndarray|Tensor, optional): A manual rescaling weight + given to each class. Its length must be equal to the number of classes.The weights of various types can be adjusted under conditions such as unbalanced samples of multiple types. + *Default ``None``* +* **ignore_index** (int64, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* +* **top_k_percent_pixels** (float, optional): The value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for + the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. \ No newline at end of file diff --git a/docs/module/loss/DiceLoss_cn.md b/docs/module/loss/DiceLoss_cn.md new file mode 100644 index 0000000000..e93726f4de --- /dev/null +++ b/docs/module/loss/DiceLoss_cn.md @@ -0,0 +1,16 @@ +简体中文 | [English](DiceLoss_en.md) +## [DiceLoss](../../../paddleseg/models/losses/dice_loss.py) +Dice Loss 是一种广泛的应用于医学影像分割任务中的损失函数。Dice 系数是一种用于度量集合之间的相似程度的函数,在语义分割任务中,我们可以理解为当前的模型与真实世界中的真实模型之间的相似程度。Dice损失函数的计算过程包括用预测分割图与GT分割图之间进行点乘、对点乘结果的每个位置进行累计求和,最后计算 1-Dice 的值作为损失函数的输出,即 Dice = 1-2(|X∩Y|/|X|+|Y|)。你可以使用拉普拉斯平滑系数,将分子分母添加该系数后,可以避免除0异常,同时减少过拟合。即Dice_smooth = 1-2((|X∩Y|+smooth) / (|X|+|Y|+smooth) ) +```python。 + +class paddleseg.models.losses.DiceLoss( + ignore_index = 255, + smooth = 0. +) +``` + +## Dice loss 使用指南 + +### 参数 +* **ignore_index** (int64, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* +* **smooth** (float, optional): 可以添加该smooth参数以防止出现除0异常。你也可以设置更大的平滑值(拉普拉斯平滑)以避免过拟合。*默认:`` 0``* \ No newline at end of file diff --git a/docs/module/loss/DiceLoss_en.md b/docs/module/loss/DiceLoss_en.md new file mode 100644 index 0000000000..bbd0b47451 --- /dev/null +++ b/docs/module/loss/DiceLoss_en.md @@ -0,0 +1,19 @@ +English | [简体中文](DiceLoss_cn.md) +## [DiceLoss](../../../paddleseg/models/losses/dice_loss.py) + +Dice Loss is a loss function widely used in medical image segmentation tasks. The Dice coefficient is a function used to measure the degree of similarity between sets. In the semantic segmentation task, we can understand the degree of similarity between the current model and the real model in the real world. The calculation process of the Dice loss function includes the dot multiplication between the predicted segmentation map and the GT segmentation map, the cumulative sum of each position of the dot multiplication result, and finally the calculation of the value of 1-Dice as the output of the loss function, that is, Dice = 1-2(|X∩Y|/|X|+|Y|). You can use the Laplacian smoothing coefficient. After adding the coefficient to the numerator and denominator, you can avoid the division by 0 exception and reduce overfitting. That is, Dice_smooth = 1-2((|X∩Y|+smooth) / (|X|+|Y|+smooth)) + +```python。 + +class paddleseg.models.losses.DiceLoss( + ignore_index = 255, + smooth = 0. +) +``` + +## Dice loss usage guidance + +### Args +* **ignore_index** (int64, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* +* **smooth** (float, optional): The smooth parameter can be added to prevent the division by 0 exception. You can also set a larger smoothing value (Laplacian smoothing) to avoid overfitting.*Default:``0``* \ No newline at end of file diff --git a/docs/module/loss/DualTaskLoss_cn.md b/docs/module/loss/DualTaskLoss_cn.md new file mode 100644 index 0000000000..5b106020fe --- /dev/null +++ b/docs/module/loss/DualTaskLoss_cn.md @@ -0,0 +1,16 @@ +简体中文 | [English](DualTaskLoss_en.md) +## [DualTaskLoss](../../../paddleseg/models/losses/gscnn_dual_task_loss.py) +用于为半监督学习的 Dual-task 一致性以对模型进行约束。DualTaskLoss 旨在强化多个任务之间的一致性。 + +```python +class paddleseg.models.losses.DualTaskLoss( + ignore_index = 255, + tau = 0.5 +) +``` + +## Dual task loss 使用指南 + +### 参数 +* **ignore_index** (int64): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* +* **tau** (float): Gumbel softmax 样本的tau。 \ No newline at end of file diff --git a/docs/module/loss/DualTaskLoss_en.md b/docs/module/loss/DualTaskLoss_en.md new file mode 100644 index 0000000000..63ee7d4c38 --- /dev/null +++ b/docs/module/loss/DualTaskLoss_en.md @@ -0,0 +1,17 @@ +English | [简体中文](DualTaskLoss_cn.md) +## [DualTaskLoss](../../../paddleseg/models/losses/gscnn_dual_task_loss.py) +Dual-task consistency used for semi-supervised learning to constrain the model. DualTaskLoss aims to strengthen the consistency between multiple tasks. + +```python +class paddleseg.models.losses.DualTaskLoss( + ignore_index = 255, + tau = 0.5 +) +``` + +## Dual task loss usage guidance + +### Args +* **ignore_index** (int64): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* +* **tau** (float): the tau of gumbel softmax sample. \ No newline at end of file diff --git a/docs/module/loss/EdgeAttentionLoss_cn.md b/docs/module/loss/EdgeAttentionLoss_cn.md new file mode 100644 index 0000000000..c6a9a0120a --- /dev/null +++ b/docs/module/loss/EdgeAttentionLoss_cn.md @@ -0,0 +1,16 @@ +简体中文 | [English](EdgeAttentionLoss_en.md) +## [EdgeAttentionLoss](../../../paddleseg/models/losses/edge_attention_loss.py) +适合以 encoder 提取edge,以 decoder 进行加权聚合的多任务训练场景。是一种融合边缘检测与注意力机制进行多 loss 的组合输出的方法。 + +```python +class paddleseg.models.losses.EdgeAttentionLoss( + edge_threshold = 0.8, + ignore_index = 255 +) +``` + +## Edge attention loss 使用指南 + +### 参数 +* **edge_threshold** (float): 值大于 edge_threshold 的像素被视为边缘。 +* **ignore_index** (int64): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/EdgeAttentionLoss_en.md b/docs/module/loss/EdgeAttentionLoss_en.md new file mode 100644 index 0000000000..e28a13052c --- /dev/null +++ b/docs/module/loss/EdgeAttentionLoss_en.md @@ -0,0 +1,17 @@ +English | [简体中文](EdgeAttentionLoss_cn.md) +## [EdgeAttentionLoss](../../../paddleseg/models/losses/edge_attention_loss.py) +It is suitable for multi-task training scenarios where the encoder extracts the edge and the decoder performs weighted aggregation. It is a method of combining edge detection and attention mechanism for multi-loss combined output. + +```python +class paddleseg.models.losses.EdgeAttentionLoss( + edge_threshold = 0.8, + ignore_index = 255 +) +``` + +## Edge attention loss usage guidance + +### Args +* **edge_threshold** (float): The pixels whose values are greater than edge_threshold are treated as edges. +* **ignore_index** (int64): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/L1Loss_cn.md b/docs/module/loss/L1Loss_cn.md new file mode 100644 index 0000000000..f1c761e18a --- /dev/null +++ b/docs/module/loss/L1Loss_cn.md @@ -0,0 +1,21 @@ +简体中文 | [English](L1Loss_en.md) +## [L1Loss](../../../paddleseg/models/losses/l1_loss.py) +L1范数损失函数用于计算最小绝对值偏差。该损失旨在将估计值与真实值之间的绝对差值的总和最小化。可以选择配合使用 reduction 策略对该 loss 的直接计算结果进行一定的处理。 + +```python +class paddleseg.models.losses.L1Loss( + reduction = 'mean', + ignore_index = 255 +) +``` + +## L1 loss 使用指南 + +### 参数 +* **reduction** (str, optional): 指示应用于损失值的 reduction 方式,可以指定为 ``'none'`` 或 ``'none'`` 或``'sum'``。 + + > - 如果 `reduction` 为 ``'none'``, 不对损失值做任何处理直接返回; + > - 如果 `reduction` 为 ``'mean'``, 返回经 Mean 处理后的损失; + > - 如果 `reduction` 为 ``'sum'``, 返回经 Sum 处理后的损失。 + > - *默认:``'mean'``* +* **ignore_index** (int, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/L1Loss_en.md b/docs/module/loss/L1Loss_en.md new file mode 100644 index 0000000000..e61d0d98e0 --- /dev/null +++ b/docs/module/loss/L1Loss_en.md @@ -0,0 +1,23 @@ +English | [简体中文](L1Loss_cn.md) +## [L1Loss](../../../paddleseg/models/losses/l1_loss.py) +The L1-norm loss function is used to calculate the minimum absolute value deviation. This loss aims to minimize the sum of absolute differences between the estimated value and the true value. You can choose to use the reduction strategy to perform certain processing on the direct calculation result of the loss. + +```python +class paddleseg.models.losses.L1Loss( + reduction = 'mean', + ignore_index = 255 +) +``` + +## L1 loss usage guidance + +### Args +* **reduction** (str, optional): Indicate the reduction to apply to the loss, + the candicates are ``'none'`` | ``'mean'`` | ``'sum'``. + + > - If `reduction` is ``'none'``, the unreduced loss is returned. + > - If `reduction` is ``'mean'``, the reduced mean loss is returned. + > - If `reduction` is ``'sum'``, the reduced sum loss is returned. + > - *Default:``'mean'``* +* **ignore_index** (int, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/LovaszHingeLoss_cn.md b/docs/module/loss/LovaszHingeLoss_cn.md new file mode 100644 index 0000000000..6e5f90d62e --- /dev/null +++ b/docs/module/loss/LovaszHingeLoss_cn.md @@ -0,0 +1,12 @@ +简体中文 | [English](LovaszHingeLoss_en.md) +## [LovaszHingeLoss](../../../paddleseg/models/lovasz_loss.py) +Hinge Loss是在不连续、不平滑的简单阶梯损失函数上改进的一种损失函数。对于正样本,Hinge Loss的输出应大于等于1;对于正样本,Hinge Loss的输出应小于等于-1。 + +```python +class paddleseg.models.losses.LovaszHingeLoss(ignore_index = 255) +``` + +## Binary Lovasz hinge loss使用指南 + +### 参数 +* **ignore_index** (int64): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/LovaszHingeLoss_en.md b/docs/module/loss/LovaszHingeLoss_en.md new file mode 100644 index 0000000000..39ee6066f0 --- /dev/null +++ b/docs/module/loss/LovaszHingeLoss_en.md @@ -0,0 +1,14 @@ +English| [简体中文](LovaszHingeLoss_cn.md) +## [LovaszHingeLoss](../../../paddleseg/models/lovasz_loss.py) + +Hinge Loss is a loss function improved on the simple step loss function that is not continuous and smooth. For positive samples, the output of Hinge Loss should be greater than or equal to 1; for positive samples, the output of Hinge Loss should be less than or equal to -1. + +```python +class paddleseg.models.losses.LovaszHingeLoss(ignore_index = 255) +``` + +## Lovasz hinge loss usage guidance + +### Args +* **ignore_index** (int64): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/LovaszSoftmaxLoss_cn.md b/docs/module/loss/LovaszSoftmaxLoss_cn.md new file mode 100644 index 0000000000..cd9cdd1eae --- /dev/null +++ b/docs/module/loss/LovaszSoftmaxLoss_cn.md @@ -0,0 +1,18 @@ +简体中文 | [English](LovaszSoftmaxLoss_en.md) +## [LovaszSoftmaxLoss](../../../paddleseg/models/lovasz_loss.py) + +lovasz softmax loss适用于多分类问题。该工作发表在CVPR 2018上。 +[参考文献](https://openaccess.thecvf.com/content_cvpr_2018/html/Berman_The_LovaSz-Softmax_Loss_CVPR_2018_paper.html) + +```python +class paddleseg.models.losses.LovaszSoftmaxLoss( + ignore_index = 255, + classes = 'present' +) +``` + +## Lovasz-Softmax loss使用指南 + +### 参数 +* **ignore_index** (int64): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* +* **classes** (str|list): 'all' 表示所有,'present' 表示标签中存在的类,或者要做 average 的类列表。 \ No newline at end of file diff --git a/docs/module/loss/LovaszSoftmaxLoss_en.md b/docs/module/loss/LovaszSoftmaxLoss_en.md new file mode 100644 index 0000000000..98ffede88c --- /dev/null +++ b/docs/module/loss/LovaszSoftmaxLoss_en.md @@ -0,0 +1,19 @@ +English | [简体中文](LovaszSoftmaxLoss_cn.md) +## [LovaszSoftmaxLoss](../../../paddleseg/models/lovasz_loss.py) + +Lovasz softmax loss is suitable for multi-classification problems. The work was published on CVPR 2018. +[paper](https://openaccess.thecvf.com/content_cvpr_2018/html/Berman_The_LovaSz-Softmax_Loss_CVPR_2018_paper.html) + +```python +class paddleseg.models.losses.LovaszSoftmaxLoss( + ignore_index = 255, + classes = 'present' +) +``` + +## Lovasz-Softmax loss usage guidance + +### Args +* **ignore_index** (int64): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* +* **classes** (str|list): 'all' for all, 'present' for classes present in labels, or a list of classes to average. \ No newline at end of file diff --git a/docs/module/loss/MSELoss_cn.md b/docs/module/loss/MSELoss_cn.md new file mode 100644 index 0000000000..36eeed4da8 --- /dev/null +++ b/docs/module/loss/MSELoss_cn.md @@ -0,0 +1,21 @@ +简体中文 | [English](MSELoss_en.md) +## [MSELoss](../../../paddleseg/models/mean_square_error_loss.py) +Mean square error loss 即均方根误差。均方根指模型预测值与样本真实值之间距离的平方的平均值。 + +```python +class paddleseg.models.losses.MSELoss( + reduction = 'mean', + ignore_index = 255 +) +``` + +## Mean square error loss使用指南 + +### 参数 +* **reduction** (string, optional): 对输出结果的 reduction 方式,可以指定为 ``'none'`` 或 ``'none'`` 或``'sum'``。 + + > - 如果 `reduction` 为 ``'none'``, 不对损失值做任何处理直接返回; + > - 如果 `reduction` 为 ``'mean'``, 返回经 Mean 处理后的损失; + > - 如果 `reduction` 为 ``'sum'``, 返回经 Sum 处理后的损失。 + > - *默认:``'mean'``* +* **ignore_index** (int, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/MSELoss_en.md b/docs/module/loss/MSELoss_en.md new file mode 100644 index 0000000000..5339e1d771 --- /dev/null +++ b/docs/module/loss/MSELoss_en.md @@ -0,0 +1,24 @@ +English | [简体中文](MSELoss_en.md) +## [MSELoss](../../../paddleseg/models/mean_square_error_loss.py) +The root mean square refers to the average of the square of the distance between the model's predicted value and the sample's true value. + +```python +class paddleseg.models.losses.MSELoss( + reduction = 'mean', + ignore_index = 255 +) +``` + +## Mean square error loss usage guidances + +### Args +* **reduction** (string, optional): The reduction method for the output, + could be 'none' | 'mean' | 'sum'. + + > - If `reduction` is ``'none'``, the unreduced loss is returned. + > - If `reduction` is ``'mean'``, the reduced mean loss is returned. + > - If `reduction` is ``'sum'``, the reduced sum loss is returned. + > - *Default:``'mean'``* + +* **ignore_index** (int, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/MixedLoss_cn.md b/docs/module/loss/MixedLoss_cn.md new file mode 100644 index 0000000000..af5808b121 --- /dev/null +++ b/docs/module/loss/MixedLoss_cn.md @@ -0,0 +1,18 @@ +简体中文 | [English](MixedLoss_en.md) +## [MixedLoss](../../../paddleseg/models/mixed_loss.py) + +实现混合loss训练。PaddleSeg每一种损失函数对应网络的一个logit 输出,如果要某个网络输出应用多种损失函数需要修改网络代码。MixedLoss 将允许网络对多个损失函数结果进行加权计算,只需以模块化的形式装入,就可以实现混合loss训练。 + +```python +class paddleseg.models.losses.MixedLoss(losses, coef) +``` + + +## Mixed loss使用指南 + +### 参数 +* **losses** (list of nn.Layer): 由多个损失函数类所组成的列表。 +* **coef** (float|int): 每个损失函数类的权重比。 + +### 返回值 +* MixedLoss 类的可调用对象。 \ No newline at end of file diff --git a/docs/module/loss/MixedLoss_en.md b/docs/module/loss/MixedLoss_en.md new file mode 100644 index 0000000000..bfb89999b3 --- /dev/null +++ b/docs/module/loss/MixedLoss_en.md @@ -0,0 +1,18 @@ +English | [简体中文](MixedLoss_cn.md) +## [MixedLoss](../../../paddleseg/models/mixed_loss.py) + +Realize mixed loss training. Each loss function of PaddleSeg corresponds to a logit output of the network. If you want to apply multiple loss functions to a certain network output, you need to modify the network code. MixedLoss will allow the network to weight the results of multiple loss functions, and only need to be loaded in a modular form to achieve mixed loss training. + +```python +class paddleseg.models.losses.MixedLoss(losses, coef) +``` + + +## Mixed loss usage guidance + +### Args +* **losses** (list of nn.Layer): A list consisting of multiple loss classes +* **coef** (float|int): Weighting coefficient of multiple loss + +### Returns +* A callable object of MixedLoss. \ No newline at end of file diff --git a/docs/module/loss/OhemCrossEntropyLoss_cn.md b/docs/module/loss/OhemCrossEntropyLoss_cn.md new file mode 100644 index 0000000000..1a3b2c1e94 --- /dev/null +++ b/docs/module/loss/OhemCrossEntropyLoss_cn.md @@ -0,0 +1,18 @@ +简体中文 | [English](OhemCrossEntropyLoss_en.md) +## [OhemCrossEntropyLoss](../../../paddleseg/models/ohem_cross_entropy_loss.py) +OHEM旨在解决处理困难样本的问题。在一些语义分割问题中,经常出现像素点难以标注或无法标注的情况,或是类别不平衡的情况,都将对模型性能产生严重的制约。OHEM算法将根据输入到模型中的样本的损失来区分出困难样本,这些困难样本分类精度差,会产生较大的损失。 + +```python +class paddleseg.models.losses.OhemCrossEntropyLoss( + thresh = 0.7, + min_kept = 10000, + ignore_index = 255 +) +``` + +## Ohem cross entropy loss使用指南 + +### 参数 +* **thresh** (float, optional): ohem的阈值。 *默认:``0.7``* +* **min_kept** (int, optional): 指定最小保持用于计算损失函数的像素数。``min_kept`` 与 ``thresh`` 配合使用:如果 ``thresh`` 设置过高,可能导致本轮迭代中没有对损失函数的输入值,因此设定该值可以保证至少前``min_kept``个元素不会被过滤掉。*默认:``10000``* +* **ignore_index** (int64, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/OhemCrossEntropyLoss_en.md b/docs/module/loss/OhemCrossEntropyLoss_en.md new file mode 100644 index 0000000000..61e0c71e92 --- /dev/null +++ b/docs/module/loss/OhemCrossEntropyLoss_en.md @@ -0,0 +1,20 @@ +English | [简体中文](OhemCrossEntropyLoss_cn.md) +## [OhemCrossEntropyLoss](../../../paddleseg/models/ohem_cross_entropy_loss.py) + +OHEM aims to handling difficult samples. In some cases, there are unbalanced classes, and labelling all pixels is difficult or even impossible, which will severely restrict the performance of the model. The OHEM algorithm will distinguish difficult samples based on the loss of the samples input to the model. These difficult samples have poor classification accuracy and will produce greater losses. + +```python +class paddleseg.models.losses.OhemCrossEntropyLoss( + thresh = 0.7, + min_kept = 10000, + ignore_index = 255 +) +``` + +## Ohem cross entropy loss usage guidance + +### Args +* **thresh** (float, optional): The threshold of ohem. *Default:``0.7``* +* **min_kept** (int, optional): Specify the minimum number of pixels to keep for calculating the loss function.``min_kept`` is used in conjunction with ``thresh``: If ``thresh`` is set too high, it may result in no input value to the loss function in this round of iteration, so setting this value can ensure that at least the top ``min_kept`` elements will not be filtered out. *Default:``10000``* +* **ignore_index** (int64, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/OhemEdgeAttentionLoss_cn.md b/docs/module/loss/OhemEdgeAttentionLoss_cn.md new file mode 100644 index 0000000000..ca73891dc7 --- /dev/null +++ b/docs/module/loss/OhemEdgeAttentionLoss_cn.md @@ -0,0 +1,19 @@ +简体中文 | [English](OhemEdgeAttentionLoss_en.md) +## [OhemEdgeAttentionLoss](../../../paddleseg/models/ohem_edge_attention_loss.py) +OHEM算法将根据输入到模型中的样本的损失来区分出困难样本,这些困难样本分类精度差,会产生较大的损失。在存在困难样本的场景下,如欲提高提取边缘的性能,可以使用该损失函数。 +```python +class paddleseg.models.losses.OhemEdgeAttentionLoss( + edge_threshold = 0.8, + thresh = 0.7, + min_kept = 5000, + ignore_index = 255 +) +``` + +## Ohem edge attention loss使用指南 + +### 参数 +* **edge_threshold** (float, optional): 值大于 edge_threshold 的像素被视为边缘。 *默认:``0.8``* +* **thresh** (float, optional): ohem的阈值。 *默认:`` 0.7``* +* **min_kept** (int, optional): 指定最小保持用于计算损失函数的像素数。``min_kept`` 与 ``thresh`` 配合使用:如果 ``thresh`` 设置过高,可能导致本轮迭代中没有对损失函数的输入值,因此设定该值可以保证至少前``min_kept``个元素不会被过滤掉。*默认:``5000``* +* **ignore_index** (int64, optional): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/OhemEdgeAttentionLoss_en.md b/docs/module/loss/OhemEdgeAttentionLoss_en.md new file mode 100644 index 0000000000..ad781c21d0 --- /dev/null +++ b/docs/module/loss/OhemEdgeAttentionLoss_en.md @@ -0,0 +1,20 @@ +English | [简体中文](OhemEdgeAttentionLoss_cn.md) +## [OhemEdgeAttentionLoss](../../../paddleseg/models/ohem_edge_attention_loss.py) +The OHEM algorithm will distinguish difficult samples based on the loss of the samples input to the model. These difficult samples have poor classification accuracy and will produce greater losses. In the above cases, if you want to improve the performance of edge extraction, you can use this loss function. +```python +class paddleseg.models.losses.OhemEdgeAttentionLoss( + edge_threshold = 0.8, + thresh = 0.7, + min_kept = 5000, + ignore_index = 255 +) +``` + +## Ohem edge attention loss usage guidance + +### Args +* **edge_threshold** (float, optional): The pixels greater edge_threshold as edges. *Default:`` 0.8``* +* **thresh** (float, optional): The threshold of ohem. *Default:`` 0.7``* +* **min_kept** (int, optional): Specify the minimum number of pixels to keep for calculating the loss function.``min_kept`` is used in conjunction with ``thresh``: If ``thresh`` is set too high, it may result in no input value to the loss function in this round of iteration, so setting this value can ensure that at least the top ``min_kept`` elements will not be filtered out. *Default:``5000``* +* **ignore_index** (int64, optional): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/RelaxBoundaryLoss_cn.md b/docs/module/loss/RelaxBoundaryLoss_cn.md new file mode 100644 index 0000000000..d41bee0cb8 --- /dev/null +++ b/docs/module/loss/RelaxBoundaryLoss_cn.md @@ -0,0 +1,21 @@ +简体中文 | [English](RelaxBoundaryLoss_en.md) +## [RelaxBoundaryLoss](../../../paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py) + +Relax boundary loss 由多个部分组成:主体特征的损失、边缘特征的损失、交叉熵损失。RelaxBoundaryLoss是为 DecoupleSegNet 设计的损失函数,该模型为边界像素点可能所属的类别(≥2)作出预测,该损失目的是在边界松弛约束下,使单个像素各分类的概率之和最大化。 + +```python +class paddleseg.models.losses.RelaxBoundaryLoss( + border = 1, + calculate_weights = False, + upper_bound = 1.0, + ignore_index = 255 +) +``` + +## Relax boundary loss使用指南 + +### 参数 +* **border** (int, optional): 边界的松弛值。*默认:``1``* +* **calculate_weights** (bool, optional): 是否计算所有类别的权重。 *默认:``False``* +* **upper_bound** (float, optional): 如果为所有类别计算权重,则指定权重的上限值。 *默认:``1.0``* +* **ignore_index** (int64): 指定一个在标注图中要忽略的像素值,其对输入梯度不产生贡献。当标注图中存在无法标注(或很难标注)的像素时,可以将其标注为某特定灰度值。在计算损失值时,其与原图像对应位置的像素将不作为损失函数的自变量。 *默认:``255``* \ No newline at end of file diff --git a/docs/module/loss/RelaxBoundaryLoss_en.md b/docs/module/loss/RelaxBoundaryLoss_en.md new file mode 100644 index 0000000000..3062cef335 --- /dev/null +++ b/docs/module/loss/RelaxBoundaryLoss_en.md @@ -0,0 +1,23 @@ +English | [简体中文](RelaxBoundaryLoss_cn.md) +## [RelaxBoundaryLoss](../../../paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py) + +Relax boundary loss is composed of multiple parts: the loss of main features, the loss of edge features, and the cross-entropy loss. RelaxBoundaryLoss is a loss function designed for DecoupleSegNet. This model makes predictions for the categories (≥2) that boundary pixels may belong to. This loss is to maximize the sum of the probabilities of each category of a single pixel under the constraint of boundary relaxation. + +```python + +class paddleseg.models.losses.RelaxBoundaryLoss( + border = 1, + calculate_weights = False, + upper_bound = 1.0, + ignore_index = 255 +) +``` + +## Relax boundary loss usage guidance + +### Args +* **border** (int, optional): The value of border to relax. *Default:`` 1``* +* **calculate_weights** (bool, optional): Whether to calculate weights for every classes. *Default:``False``* +* **upper_bound** (float, optional): The upper bound of weights if calculating weights for every classes. *Default:``1.0``* +* **ignore_index** (int64): Specify a pixel value to be ignored in the annotated image + and does not contribute to the input gradient.When there are pixels that cannot be marked (or difficult to be marked) in the marked image, they can be marked as a specific gray value. When calculating the loss value, the pixel corresponding to the original image will not be used as the independent variable of the loss function. *Default:``255``* \ No newline at end of file diff --git a/docs/module/loss/losses_cn.md b/docs/module/loss/losses_cn.md new file mode 100644 index 0000000000..518b04cff3 --- /dev/null +++ b/docs/module/loss/losses_cn.md @@ -0,0 +1,26 @@ +简体中文 | [English](losses_en.md) +# 损失函数的详细说明 + +* ## [paddleseg.models.losses.binary_cross_entropy_loss](./BCELoss_cn.md) + +* ## [paddleseg.models.losses.bootstrapped_cross_entropy](./BootstrappedCrossEntropyLoss_cn.md) + +* ## [paddleseg.models.losses.cross_entropy_loss](./CrossEntropyLoss_cn.md) + +* ## [paddleseg.models.losses.decoupledsegnet_relax_boundary_loss](./RelaxBoundaryLoss_cn.md) + +* ## [paddleseg.models.losses.dice_loss](./DiceLoss_cn.md) + +* ## [paddleseg.models.losses.DualTaskLoss](./DualTaskLoss_cn.md) + +* ## [paddleseg.models.losses.edge_attention_loss](./EdgeAttentionLoss_cn.md) + +* ## [paddleseg.models.losses.l1_loss](./L1Loss_cn.md) + +* ## [paddleseg.models.losses.Lovasz_loss](./lovasz_loss_cn.md) + +* ## [paddleseg.models.losses.MSELoss](./MSELoss_cn.md) + +* ## [paddleseg.models.losses.ohem_cross_entropy_loss](./OhemCrossEntropyLoss_cn.md) + +* ## [paddleseg.models.losses.ohem_edge_attention_loss](./OhemEdgeAttentionLoss_cn.md) \ No newline at end of file diff --git a/docs/module/loss/losses_en.md b/docs/module/loss/losses_en.md new file mode 100644 index 0000000000..4ba11085e9 --- /dev/null +++ b/docs/module/loss/losses_en.md @@ -0,0 +1,26 @@ +English | [简体中文](losses_cn.md) +# The Detailed description of the loss function + +* ## [paddleseg.models.losses.binary_cross_entropy_loss](./BCELoss_en.md) + +* ## [paddleseg.models.losses.bootstrapped_cross_entropy](./BootstrappedCrossEntropyLoss_en.md) + +* ## [paddleseg.models.losses.cross_entropy_loss](./CrossEntropyLoss_en.md) + +* ## [paddleseg.models.losses.decoupledsegnet_relax_boundary_loss](./RelaxBoundaryLoss_en.md) + +* ## [paddleseg.models.losses.dice_loss](./DiceLoss_en.md) + +* ## [paddleseg.models.losses.DualTaskLoss](./DualTaskLoss_en.md) + +* ## [paddleseg.models.losses.edge_attention_loss](./EdgeAttentionLoss_en.md) + +* ## [paddleseg.models.losses.l1_loss](./L1Loss_en.md) + +* ## [paddleseg.models.losses.Lovasz_loss](./lovasz_loss_en.md) + +* ## [paddleseg.models.losses.MSELoss](./MSELoss_en.md) + +* ## [paddleseg.models.losses.ohem_cross_entropy_loss](./OhemCrossEntropyLoss_en.md) + +* ## [paddleseg.models.losses.ohem_edge_attention_loss](./OhemEdgeAttentionLoss_en.md) \ No newline at end of file diff --git a/docs/module/loss/lovasz_loss.md b/docs/module/loss/lovasz_loss_cn.md similarity index 96% rename from docs/module/loss/lovasz_loss.md rename to docs/module/loss/lovasz_loss_cn.md index 5ec790bdfe..b29b34ae5c 100644 --- a/docs/module/loss/lovasz_loss.md +++ b/docs/module/loss/lovasz_loss_cn.md @@ -2,7 +2,7 @@ 在图像分割任务中,经常出现类别分布不均匀的情况,例如:工业产品的瑕疵检测、道路提取及病变区域提取等。我们可使用lovasz loss解决这个问题。 -Lovasz loss基于子模损失(submodular losses)的凸Lovasz扩展,对神经网络的mean IoU损失进行优化。Lovasz loss根据分割目标的类别数量可分为两种:lovasz hinge loss和lovasz softmax loss. 其中lovasz hinge loss适用于二分类问题,lovasz softmax loss适用于多分类问题。该工作发表在CVPR 2018上,可点击[参考文献](#参考文献)查看具体原理。 +Lovasz loss基于子模损失(submodular losses)的凸Lovasz扩展,对神经网络的mean IoU损失进行优化。Lovasz loss根据分割目标的类别数量可分为两种:lovasz hinge loss和lovasz softmax loss. 其中lovasz hinge loss适用于二分类问题,lovasz softmax loss适用于多分类问题。该工作发表在CVPR 2018上,可点击[参考文献](https://openaccess.thecvf.com/content_cvpr_2018/html/Berman_The_LovaSz-Softmax_Loss_CVPR_2018_paper.html)查看具体原理。 ## Lovasz loss使用指南 diff --git a/docs/module/loss/lovasz_loss_en.md b/docs/module/loss/lovasz_loss_en.md new file mode 100644 index 0000000000..845143433d --- /dev/null +++ b/docs/module/loss/lovasz_loss_en.md @@ -0,0 +1,125 @@ +# Lovasz loss + +In image segmentation tasks, uneven distribution of categories often occurs, such as flaw detection of industrial products, road extraction, and diseased area extraction. We can use lovasz loss to solve this problem. + +Lovasz loss is based on the convex Lovasz extension of submodular losses, which optimizes the mean IoU loss of the neural network. Lovasz loss can be divided into two types according to the number of categories of the segmentation target: lovasz hinge loss and lovasz softmax loss. Among them, lovasz hinge loss is suitable for two classification problems, and lovasz softmax loss is suitable for multi-classification problems. This work was published on CVPR 2018, and you can click [References](#references) to view the specific principles. + + +## LovaszLoss usage guidance +Next, we will tell you how to use lovasz loss for training. It should be noted that the usual direct training method does not work, we recommend two other training methods: +-(1) Used in combination with cross entropy loss or bce loss (binary cross-entropy loss) weighting. +-(2) First use cross entropy loss or bce loss for training, and then use lovasz softmax loss or lovasz hinge loss for finetuning. + +Taking method (1) as an example, the loss function during training is selected through the `MixedLoss` class, and different losses are weighted through the `coef` parameter, so as to flexibly adjust the training parameters. As follows: + +```yaml +loss: + types: + - type: MixedLoss + losses: + - type: CrossEntropyLoss + - type: LovaszSoftmaxLoss + coef: [0.8, 0.2] +``` + +```yaml +loss: + types: + - type: MixedLoss + losses: + - type: CrossEntropyLoss + - type: LovaszHingeLoss + coef: [1, 0.02] +``` + + +## Lovasz softmax loss experimental comparison + +Next, take the classic [Cityscapes](https://www.cityscapes-dataset.com/) dataset as an example to apply lovasz softmax loss. The Cityscapes dataset has 19 categories of targets, and the categories are not balanced, such as the category `road `And `building` are very common, while `fence`, `motocycle`, and `wall` are relatively rare. We compared lovasz softmax loss with softmax loss experimentally. The OCRNet model is used here, and the backbone is HRNet w18. + + +* Data preparation + +Please see the data set preparation tutorial. + +* Lovasz loss training +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 python -u -m paddle.distributed.launch train.py \ +--config configs/ocrnet/ocrnet_hrnetw18_cityscapes_1024x512_160k_lovasz_softmax.yml \ +--use_vdl --num_workers 3 --do_eval +``` + +* Cross entropy loss training +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 python -u -m paddle.distributed.launch train.py \ +--config configs/ocrnet/ocrnet_hrnetw18_cityscapes_1024x512_160k.yml \ +--use_vdl --num_workers 3 --do_eval +``` + +* Comparison of results + +The experimental mIoU curve is shown in the figure below. +![img](../images/Lovasz_Softmax_Evaluate_mIoU.png) + + + + +The blue curve in the figure represents lovasz softmax loss + cross entropy loss, and the green curve represents cross entropy loss, which is an increase of 1%. + +It can be seen that after using lovasz softmax loss, the accuracy curve is basically higher than the original accuracy. + + +|Loss|best mIoU| +|-|-| +|cross entropy loss|80.46%| +|lovasz softmax loss + cross entropy loss|81.53%| + +## Lovasz hinge loss experimental comparison + +We take the road extraction task as an example to apply lovasz hinge loss. +Based on the MiniDeepGlobeRoadExtraction data set and cross entropy loss, an experiment was compared. +This data set comes from the Road Extraction single item of [DeepGlobe CVPR2018 Challenge](http://deepglobe.org/), and the training data roads account for 4.5%. Roads have a small proportion in the entire picture, which is a typical category Unbalanced scene. The picture sample is as follows: +![img](../images/deepglobe.png) + + +The OCRNet model is used here, and the backbone is HRNet w18. + +* Dataset +We randomly selected 800 pictures from the training set of the Road Extraction of the DeepGlobe competition as the training set and 200 pictures as the validation set. +Created a small road extraction data set [MiniDeepGlobeRoadExtraction](https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip). +Running the training script will automatically download the data set. + +* Lovasz loss training +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 python -u -m paddle.distributed.launch train.py \ +--config configs/ocrnet/ocrnet_hrnetw18_road_extraction_768x768_15k_lovasz_hinge.yml \ +--use_vdl --num_workers 3 --do_eval +``` + +* Cross entropy loss training +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 python -u -m paddle.distributed.launch train.py \ +--config configs/ocrnet/ocrnet_hrnetw18_road_extraction_768x768_15k.yml \ +--use_vdl --num_workers 3 --do_eval +``` + +* Comparison of results + +The experimental mIoU curve is shown in the figure below. +![img](../images/Lovasz_Hinge_Evaluate_mIoU.png) + + + + +The purple curve in the figure is lovasz hinge loss + cross entropy loss, and the blue curve is cross entropy loss, which is 0.5% higher than that. + +It can be seen that after using lovasz hinge loss, the accuracy curve is overall higher than the original accuracy. + +|Loss|best mIoU| +|-|-| +|cross entropy loss|78.69%| +|lovasz softmax loss + cross entropy loss|79.18%| + + +## References +[Berman M, Rannen Triki A, Blaschko M B. The lovász-softmax loss: a tractable surrogate for the optimization of the intersection-over-union measure in neural networks[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018: 4413-4421.](http://openaccess.thecvf.com/content_cvpr_2018/html/Berman_The_LovaSz-Softmax_Loss_CVPR_2018_paper.html) diff --git a/docs/predict/color_map/after_mapped.jpeg b/docs/predict/color_map/after_mapped.jpeg new file mode 100644 index 0000000000..dcad6d2bc5 Binary files /dev/null and b/docs/predict/color_map/after_mapped.jpeg differ diff --git a/docs/predict/color_map/before_mapped.jpeg b/docs/predict/color_map/before_mapped.jpeg new file mode 100644 index 0000000000..6cb0b54623 Binary files /dev/null and b/docs/predict/color_map/before_mapped.jpeg differ diff --git a/docs/predict/predict.md b/docs/predict/predict.md new file mode 100644 index 0000000000..48df80a49a --- /dev/null +++ b/docs/predict/predict.md @@ -0,0 +1,143 @@ +English|[简体中文](predict_cn.md) + +# Prediction + +In addition to analyzing the `IOU`, `ACC` and `Kappa`, we can also check the segmentation effect of some specific samples, and inspire further optimization ideas from Bad Case. + +The `predict.py` script is specially used to visualize prediction cases. The command format is as follows: + +``` +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path dataset/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result +``` + +Among them, `image_path` can also be a directory. At this time, all the images in the directory will be predicted and the visualization results will be saved. + +Similarly, you can use `--aug_pred` to turn on multi-scale flip prediction, and `--is_slide` to turn on sliding window prediction. + + +## 1.Prepare Dataset + +- When performing prediction, only the original image is needed. You should prepare the contents of `test.txt` as follows: + ``` + images/image1.jpg + images/image2.jpg + ... + ``` + +- When calling `predict.py` for visualization, annotated images can be included in the file list. When predicting, the model will automatically ignore the annotated images given in the file list. Therefore, you can also directly use the training and validating datasets to do predictions. In other words, if the content of your `train.txt` is as follows: + ``` + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + ``` + +* At this point, you can specify `image_list` as `train.txt` and `image_dir` as the directory where the training data is located when predicting. The robustness of PaddleSeg allows you to do this, and the output will be the prediction result of the **original training data**. + +## 2.API +Parameter Analysis of Forecast API + +``` +paddleseg.core.predict( + model, + model_path, + transforms, + image_list, + image_dir=None, + save_dir='output', + aug_pred=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + custom_color=None +) +``` + +- Parameters + +| Parameter | Type | Effection | Is Required | Default | +| --------------- | ----------------- | ---------------------------------------------------- | ---------- | -------- | +| model | nn.Layer | Segmentation model | Yes | - | +| model_path | str | The path of parameters in best model | Yes | - | +| transforms | transform.Compose | Preprocess the input image | Yes | - | +| image_list | list | List of image paths to be predicted | Yes | - | +| image_dir | str | The directory of the image path to be predicted | No | None | +| save_dir | str | Output directory | No | 'output' | +| aug_pred | bool | Whether to use multi-scale and flip augmentation for prediction | No | False | +| scales | list/float | Set the zoom factor, take effect when aug_pred is True | No | 1.0 | +| flip_horizontal | bool | Whether to use horizontal flip, take effect when `aug_eval` is True | No | True | +| flip_vertical | bool | Whether to use vertical flip, take effect when `aug_eval` is True | No | False | +| is_slide | bool | Whether to evaluate through a sliding window | No | False | +| stride | tuple/list | Set the width and height of the sliding window, effective when `is_slide` is True | No | None | +| crop_size | tuple/list | Set the width and height of the crop of the sliding window, which takes effect when `is_slide` is True | No | None | +| custom_color | list | Set custom segmentation prediction colors,len(custom_color) = 3 * (pixel classes) | No | Default color map | + +Import the API interface and start predicting. + +``` +from paddleseg.core import predict +predict( + model, + model_path='output/best_model/model.pdparams',# Model path + transforms=transforms, # Transform.Compose, Preprocess the input image + image_list=image_list, # List of image paths to be predicted。 + image_dir=image_dir, # The directory where the picture to be predicted is located + save_dir='output/results' # Output path + ) +``` + +## 3.Instruction of File Structure +If you don't specify the output location, `added_prediction` and `pseudo_color_prediction` will be generated under the default folder `output/results`, which store the results of the pseudo map and blended prediction respectively. + + output/result + | + |--added_prediction + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--pseudo_color_prediction + | |--image1.jpg + | |--image2.jpg + | |--... + + +## 4.Custom Color Map +After prediction, what we get is the predicted segmentation result depend on default color map. Take the medical video disc segmentation dataset as an example: +![](./color_map/before_mapped.jpeg) + +​ pseudo map / blended prediction + +In the segmentation result, the foreground is marked in red, and the background is marked in black. If you want to use other colors, you can refer to the following command: +```python +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path data/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result \ + --custom_color 0 0 0 255 255 255 +``` +The segmentation results are as follows: +![](./color_map/after_mapped.jpeg) + +​ pseudo map / blended prediction + +- Parameters +- You can see that we added `--custom_color 0 0 0 255 255 255` at the end. What does this mean? In the RGB image, the final color of each pixel is determined by the components of the three RGB channels, so every three digits after the command line parameter represent the color of a pixel, and the color of each class in `label.txt` is releated to 3 digits sequentially. +- If you use a custom color map, the number of input `color values` should be equal to `3 * pixel classes` (depending on the dataset you are using). For example, if your data set has 3 types of pixels, you can consider run: +```pythons +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path data/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result \ + --custom_color 0 0 0 100 100 100 200 200 200 +``` + +We recommend that you refer to the RGB color value comparison table to set `--custom_color`. diff --git a/docs/predict/predict_cn.md b/docs/predict/predict_cn.md new file mode 100644 index 0000000000..27d447afd4 --- /dev/null +++ b/docs/predict/predict_cn.md @@ -0,0 +1,142 @@ +简体中文|[English](predict.md) +# 预测 + +除了分析模型的IOU、ACC和Kappa指标之外,我们还可以查阅一些具体样本的切割样本效果,从Bad Case启发进一步优化的思路。 + +predict.py脚本是专门用来可视化预测案例的,命令格式如下所示: + +``` +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path dataset/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result +``` + +其中`image_path`也可以是一个目录,这时候将对目录内的所有图片进行预测并保存可视化结果图。 + +同样的,可以通过`--aug_pred`开启多尺度翻转预测, `--is_slide`开启滑窗预测。 + + +## 1.准备预测数据 + +- 在执行预测时,仅需要原始图像。你应该准备好 `test.txt` 的内容,如下所示: + ``` + images/image1.jpg + images/image2.jpg + ... + ``` + +- 在调用`predict.py`进行可视化展示时,文件列表中可以包含标注图像。在预测时,模型将自动忽略文件列表中给出的标注图像。因此,你也可以直接使用训练、验证数据集进行预测。也就是说,如果你的`train.txt`的内容如下: + ``` + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + ``` + +* 此时你可以在预测时将`image_list`指定为`train.txt`,将`image_dir`指定为训练数据所在的目录。PaddleSeg的鲁棒性允许你这样做,输出的结果将是对**原始训练数据**的预测结果。 + +## 2.预测函数API +预测API的参数解析 + +``` +paddleseg.core.predict( + model, + model_path, + transforms, + image_list, + image_dir=None, + save_dir='output', + aug_pred=False, + scales=1.0, + flip_horizontal=True, + flip_vertical=False, + is_slide=False, + stride=None, + crop_size=None, + custom_color=None +) +``` + +- 参数说明如下 + +| 参数名 | 数据类型 | 用途 | 是否必选项 | 默认值 | +| --------------- | ----------------- | ---------------------------------------------------- | ---------- | -------- | +| model | nn.Layer | 分割模型 | 是 | - | +| model_path | str | 训练最优模型的路径 | 是 | - | +| transforms | transform.Compose | 对输入图像进行预处理 | 是 | - | +| image_list | list | 待预测的图像路径列表 | 是 | - | +| image_dir | str | 待要预测的图像路径目录 | 否 | None | +| save_dir | str | 结果输出路径 | 否 | 'output' | +| aug_pred | bool | 是否使用多尺度和翻转增广进行预测 | 否 | False | +| scales | list/float | 设置缩放因子,`aug_pred`为True时生效 | 否 | 1.0 | +| flip_horizontal | bool | 是否使用水平翻转,`aug_eval`为True时生效 | 否 | True | +| flip_vertical | bool | 是否使用垂直翻转,`aug_eval`为True时生效 | 否 | False | +| is_slide | bool | 是否通过滑动窗口进行评估 | 否 | False | +| stride | tuple/list | 设置滑动窗宽的宽度和高度,`is_slide`为True时生效 | 否 | None | +| crop_size | tuple/list | 设置滑动窗口的裁剪的宽度和高度,`is_slide`为True时生效 | 否 | None | +| custom_color | list | 设置自定义分割预测颜色,len(custom_color) = 3 * 像素种类 | 否 | 预设color map | + +导入API接口,开始预测 + +``` +from paddleseg.core import predict +predict( + model, + model_path='output/best_model/model.pdparams',# 模型路径 + transforms=transforms, #transform.Compose, 对输入图像进行预处理 + image_list=image_list, #list,待预测的图像路径列表。 + image_dir=image_dir, #str,待预测的图片所在目录 + save_dir='output/results' #str,结果输出路径 + ) +``` + +## 3.输出文件说明 +如果你不指定输出位置,在默认文件夹`output/results`下将生成两个文件夹`added_prediction`与`pseudo_color_prediction`, 分别存放叠加效果图与预测mask的结果。 + + output/result + | + |--added_prediction + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--pseudo_color_prediction + | |--image1.jpg + | |--image2.jpg + | |--... + + +## 4.自定义color map +经过预测后,我们得到的是默认color map配色的预测分割结果。以视盘分割为例: +![](./color_map/before_mapped.jpeg) + +​ 伪彩色标注图/叠加图 + +在该分割结果中,前景以红色标明,背景以黑色标明。如果你想要使用其他颜色,可以参考如下命令: +```python +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path data/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result \ + --custom_color 0 0 0 255 255 255 +``` +分割预测结果如下: +![](./color_map/after_mapped.jpeg) + +​ 伪彩色标注图/叠加图 + +- 参数解析 +- 可以看到我们在最后添加了 `--custom_color 0 0 0 255 255 255`,这是什么意思呢?在RGB图像中,每个像素最终呈现出来的颜色是由RGB三个通道的分量共同决定的,因此该命令行参数后每三位代表一种像素的颜色,位置与`label.txt`中各类像素点一一对应。 +- 如果使用自定义color map,输入的`color值`的个数应该等于`3 * 像素种类`(取决于你所使用的数据集)。比如,你的数据集有 3 种像素,则可考虑执行: +```python +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path data/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result \ + --custom_color 0 0 0 100 100 100 200 200 200 +``` + +我们建议你参照RGB颜色数值对照表来设置`--custom_color`。 diff --git a/docs/release_notes.md b/docs/release_notes.md index 1f22e10e06..34d93f49cb 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -59,7 +59,7 @@ English | [简体中文](release_notes_cn.md) * 2020.05.12 **`v0.5.0`** - * 全面升级[HumanSeg人像分割模型](../contrib/HumanSeg),新增超轻量级人像分割模型HumanSeg-lite支持移动端实时人像分割处理,并提供基于光流的视频分割后处理提升分割流畅性。 + * 全面升级[HumanSeg人像分割模型](../contrib/PP-HumanSeg),新增超轻量级人像分割模型HumanSeg-lite支持移动端实时人像分割处理,并提供基于光流的视频分割后处理提升分割流畅性。 * 新增[气象遥感分割方案](../contrib/RemoteSensing),支持积雪识别、云检测等气象遥感场景。 * 新增[Lovasz Loss](lovasz_loss.md),解决数据类别不均衡问题。 * 使用VisualDL 2.0作为训练可视化工具 diff --git a/docs/release_notes_cn.md b/docs/release_notes_cn.md index f6636487b4..0c756cb5d5 100644 --- a/docs/release_notes_cn.md +++ b/docs/release_notes_cn.md @@ -58,7 +58,7 @@ * 2020.05.12 **`v0.5.0`** - * 全面升级[HumanSeg人像分割模型](../contrib/HumanSeg),新增超轻量级人像分割模型HumanSeg-lite支持移动端实时人像分割处理,并提供基于光流的视频分割后处理提升分割流畅性。 + * 全面升级[HumanSeg人像分割模型](../contrib/PP-HumanSeg),新增超轻量级人像分割模型HumanSeg-lite支持移动端实时人像分割处理,并提供基于光流的视频分割后处理提升分割流畅性。 * 新增[气象遥感分割方案](../contrib/RemoteSensing),支持积雪识别、云检测等气象遥感场景。 * 新增[Lovasz Loss](lovasz_loss.md),解决数据类别不均衡问题。 * 使用VisualDL 2.0作为训练可视化工具 diff --git a/docs/slim/quant/quant.md b/docs/slim/quant/quant.md index b502df02e3..f7a2757cee 100644 --- a/docs/slim/quant/quant.md +++ b/docs/slim/quant/quant.md @@ -78,7 +78,7 @@ python setup.py install ```shell # 设置1张可用的GPU卡 -export CUDA_VISIBLE_DEVICES=0 +export CUDA_VISIBLE_DEVICES=0 # windows下请执行以下命令 # set CUDA_VISIBLE_DEVICES=0 @@ -164,9 +164,42 @@ python slim/quant/qat_export.py \ 得到量化预测模型后,我们可以进行部署应用,请参考如下教程。 +<<<<<<< HEAD +<<<<<<< HEAD +得到量化预测模型后,我们可以进行部署应用。 +* [Paddle Inference Python部署](../../deployment/inference/python_inference.md) +* [Paddle Inference C++部署](../../deployment/inference/cpp_inference.md) +======= +得到量化预测模型后,我们可以直接进行部署应用,相关教程请参考: +* [Paddle Inference部署](../../deployment/inference/inference.md) +>>>>>>> release/2.2 +* [PaddleLite部署](../../deployment/lite/lite.md) + +## 4 量化加速比 + +测试环境: +* GPU: V100 +* CPU: Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz +* CUDA: 10.2 +* cuDNN: 7.6 +* TensorRT: 6.0.1.5 + +测试方法: +1. 运行耗时为纯模型预测时间,测试图片cityspcaes(1024x2048) +2. 预测10次作为热启动,连续预测50次取平均得到预测时间 +3. 使用GPU + TensorRT测试 + +|模型|未量化运行耗时(ms)|量化运行耗时(ms)|加速比| +|-|-|-|-| +|deeplabv3_resnet50_os8|204.2|150.1|26.49%| +|deeplabv3p_resnet50_os8|147.2|89.5|39.20%| +|gcnet_resnet50_os8|201.8|126.1|37.51%| +|pspnet_resnet50_os8|266.8|206.8|22.49%| +======= * [Paddle Inference Python部署](../../deployment/inference/python_inference.md) * [Paddle Inference C++部署](../../deployment/inference/cpp_inference.md) * [PaddleLite部署](../../deployment/lite/lite.md) +>>>>>>> 515bf9392c88ab1c96a335f80178aa602599e19d ### 3.4 参考资料 diff --git a/docs/solution/human/human.md b/docs/solution/human/human.md index 18aa09c582..76713ffb9a 100644 --- a/docs/solution/human/human.md +++ b/docs/solution/human/human.md @@ -1,6 +1,6 @@ -# 人像分割HumanSeg +# 人像分割 PP-HumanSeg -本教程基于PaddleSeg提供高精度人像分割模型,从训练到部署的全流程应用指南,以及视频流人像分割、背景替换的实际效果体验。最新发布超轻量级人像分割模型,支持Web端、移动端场景的实时分割。[代码链接](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.1/contrib/HumanSeg) +本教程基于PaddleSeg提供高精度人像分割模型,从训练到部署的全流程应用指南,以及视频流人像分割、背景替换的实际效果体验。最新发布超轻量级人像分割模型,支持Web端、移动端场景的实时分割。[代码链接](../../../contrib/PP-HumanSeg) - [人像分割模型](#人像分割模型) - [安装](#安装) @@ -13,13 +13,13 @@ - [移动端部署](#移动端部署) ## 人像分割模型 -HumanSeg开放了在大规模人像数据上训练的三个人像模型,满足服务端、移动端、Web端多种使用场景的需求。 +PP-HumanSeg开放了在大规模人像数据上训练的三个人像模型,满足服务端、移动端、Web端多种使用场景的需求。 | 模型类型 | 适用场景 | Checkpoint | Inference Model | | --- | --- | --- | ---| -| 高精度模型 | 适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/ResNet50, 输入大小(512, 512) |[humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip) | [humanseg_server_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip) | -| 轻量级模型 | 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | [humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/fcn_hrnetw18_small_v1_humanseg_192x192.zip) | [humanseg_mobile_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax.zip) | -| 超轻量级模型 | 适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为优化的ShuffleNetV2,输入大小(192, 192) | [humanseg_lite_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/shufflenetv2_humanseg_192x192.zip) | [humanseg_lite_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/shufflenetv2_humanseg_192x192_with_softmax.zip) | +| 高精度模型 | 适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/ResNet50, 输入大小(512, 512) |[pp_humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip) | [pp_humanseg_server_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip) | +| 轻量级模型 | 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | [pp_humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/fcn_hrnetw18_small_v1_humanseg_192x192.zip) | [pp_humanseg_mobile_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax.zip) | +| 超轻量级模型 | 适用于Web端或移动端实时分割场景,例如手机自拍、Web视频会议,模型结构为优化的ShuffleNetV2,输入大小(192, 192) | [pp_humanseg_lite_ckpt](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/shufflenetv2_humanseg_192x192.zip) | [pp_humanseg_lite_inference](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/shufflenetv2_humanseg_192x192_with_softmax.zip) | **NOTE:** * 其中Checkpoint为模型权重,用于Fine-tuning场景。 @@ -65,9 +65,9 @@ git clone https://github.com/PaddlePaddle/PaddleSeg ``` ## 快速体验 -以下所有命令均在`PaddleSeg/contrib/HumanSeg`目录下执行。 +以下所有命令均在`PaddleSeg/contrib/PP-HumanSeg`目录下执行。 ```shell -cd PaddleSeg/contrib/HumanSeg +cd PaddleSeg/contrib/PP-HumanSeg ``` ### 下载Inference Model diff --git a/docs/train/train.md b/docs/train/train.md index 1ba83b960e..6719a36496 100644 --- a/docs/train/train.md +++ b/docs/train/train.md @@ -1,10 +1,14 @@ -# 模型训练 +English|[简体中文](train_cn.md) +# Model Training + +## 1、Start Training + +We can train the model through the script provided by PaddleSeg. Here we use `BiseNet` model and `optic_disc` dataset to show the training process. Please make sure that you have already installed PaddleSeg, and it is located in the PaddleSeg directory. Then execute the following script: -我们可以通过PaddleSeg提供的脚本对模型进行训练,请确保完成了PaddleSeg的安装工作,并且位于PaddleSeg目录下,执行以下脚本: ```shell -export CUDA_VISIBLE_DEVICES=0 # 设置1张可用的卡 -# windows下请执行以下命令 +export CUDA_VISIBLE_DEVICES=0 # Set 1 usable card +# If you are using windows, please excute following script: # set CUDA_VISIBLE_DEVICES=0 python train.py \ --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ @@ -14,26 +18,29 @@ python train.py \ --save_dir output ``` -### 训练参数解释 +### Parameters + +| Parameter | Effection | Is Required | Default | +| :------------------ | :----------------------------------------------------------- | :--------- | :--------------- | +| iters | Number of training iterations | No | The value specified in the configuration file.| | +| batch_size | Batch size on a single card | No | The value specified in the configuration file.| | +| learning_rate | Initial learning rate | No | The value specified in the configuration file.| | +| config | Configuration files | Yes | - | +| save_dir | The root path for saving model and visualdl log files | No | output | +| num_workers | The number of processes used to read data asynchronously, when it is greater than or equal to 1, the child process is started to read dat | No | 0 | +| use_vdl | Whether to enable visualdl to record training data | No | No | +| save_interval | Number of steps between model saving | No | 1000 | +| do_eval | Whether to start the evaluation when saving the model, the best model will be saved to best_model according to mIoU at startup | No | No | +| log_iters | Interval steps for printing log | No | 10 | +| resume_model | Restore the training model path, such as: `output/iter_1000` | No | None | +| keep_checkpoint_max | Number of latest models saved | No | 5 | -| 参数名 | 用途 | 是否必选项 | 默认值 | -| ------------------- | ------------------------------------------------------------ | ---------- | ---------------- | -| iters | 训练迭代次数 | 否 | 配置文件中指定值 | -| batch_size | 单卡batch size | 否 | 配置文件中指定值 | -| learning_rate | 初始学习率 | 否 | 配置文件中指定值 | -| config | 配置文件 | 是 | - | -| save_dir | 模型和visualdl日志文件的保存根路径 | 否 | output | -| num_workers | 用于异步读取数据的进程数量, 大于等于1时开启子进程读取数据 | 否 | 0 | -| use_vdl | 是否开启visualdl记录训练数据 | 否 | 否 | -| save_interval_iters | 模型保存的间隔步数 | 否 | 1000 | -| do_eval | 是否在保存模型时启动评估, 启动时将会根据mIoU保存最佳模型至best_model | 否 | 否 | -| log_iters | 打印日志的间隔步数 | 否 | 10 | -| resume_model | 恢复训练模型路径,如:`output/iter_1000` | 否 | None | +## 2、Multi-card training +If you want to use multi-card training, you need to specify the environment variable `CUDA_VISIBLE_DEVICES` as `multi-card` (if not specified, all GPUs will be used by default), and use `paddle.distributed.launch` to start the training script (Can not use multi-card training under Windows, because it doesn't support nccl): -**注意**:如果想要使用多卡训练的话,需要将环境变量CUDA_VISIBLE_DEVICES指定为多卡(不指定时默认使用所有的gpu),并使用paddle.distributed.launch启动训练脚本(windows下由于不支持nccl,无法使用多卡训练): ```shell -export CUDA_VISIBLE_DEVICES=0,1,2,3 # 设置4张可用的卡 +export CUDA_VISIBLE_DEVICES=0,1,2,3 # Set 4 usable cards python -m paddle.distributed.launch train.py \ --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ --do_eval \ @@ -42,7 +49,7 @@ python -m paddle.distributed.launch train.py \ --save_dir output ``` -恢复训练: +## 3、Resume Training: ```shell python train.py \ --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ @@ -53,21 +60,21 @@ python train.py \ --save_dir output ``` -## 训练可视化 +## 4、Visualize Training Process -PaddleSeg会将训练过程中的数据写入VisualDL文件,并实时的查看训练过程中的日志,记录的数据包括: -1. loss变化趋势 -2. 学习率变化趋势 -3. 训练时间 -4. 数据读取时间 -5. mean IoU变化趋势(当打开了`do_eval`开关后生效) -6. mean pixel Accuracy变化趋势(当打开了`do_eval`开关后生效) +PaddleSeg will write the data during the training process into the VisualDL file, and view the log during the training process in real time. The recorded data includes: +1. Loss change trend. +2. Changes in learning rate. +3. Training time. +4. Data reading time. +5. Mean IoU trend (takes effect when the `do_eval` switch is turned on). +6. Trend of mean pixel Accuracy (takes effect when the `do_eval` switch is turned on). -使用如下命令启动VisualDL查看日志 +Run the following command to start VisualDL to view the log ```shell -# 下述命令会在127.0.0.1上启动一个服务,支持通过前端web页面查看,可以通过--host这个参数指定实际ip地址 +# The following command will start a service on 127.0.0.1, which supports viewing through the front-end web page. You can specify the actual ip address through the --host parameter visualdl --logdir output/ ``` -在浏览器输入提示的网址,效果如下: +Enter the suggested URL in the browser, the effect is as follows: ![](../images/quick_start_vdl.jpg) diff --git a/docs/train/train_cn.md b/docs/train/train_cn.md new file mode 100644 index 0000000000..1290f23c06 --- /dev/null +++ b/docs/train/train_cn.md @@ -0,0 +1,77 @@ +简体中文|[English](train.md) +# 模型训练 + +## 1、开启训练 +我们可以通过PaddleSeg提供的脚本对模型进行训练,在本文档中我们使用`BiseNet`模型与`optic_disc`数据集展示训练过程。 请确保已经完成了PaddleSeg的安装工作,并且位于PaddleSeg目录下,执行以下脚本: + +```shell +export CUDA_VISIBLE_DEVICES=0 # 设置1张可用的卡 +# windows下请执行以下命令 +# set CUDA_VISIBLE_DEVICES=0 +python train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +### 训练参数解释 + +| 参数名 | 用途 | 是否必选项 | 默认值 | +| :------------------ | :----------------------------------------------------------- | :--------- | :--------------- | +| iters | 训练迭代次数 | 否 | 配置文件中指定值 | +| batch_size | 单卡batch size | 否 | 配置文件中指定值 | +| learning_rate | 初始学习率 | 否 | 配置文件中指定值 | +| config | 配置文件 | 是 | - | +| save_dir | 模型和visualdl日志文件的保存根路径 | 否 | output | +| num_workers | 用于异步读取数据的进程数量, 大于等于1时开启子进程读取数据 | 否 | 0 | +| use_vdl | 是否开启visualdl记录训练数据 | 否 | 否 | +| save_interval | 模型保存的间隔步数 | 否 | 1000 | +| do_eval | 是否在保存模型时启动评估, 启动时将会根据mIoU保存最佳模型至best_model | 否 | 否 | +| log_iters | 打印日志的间隔步数 | 否 | 10 | +| resume_model | 恢复训练模型路径,如:`output/iter_1000` | 否 | None | +| keep_checkpoint_max | 最新模型保存个数 | 否 | 5 | + +## 2、多卡训练 +如果想要使用多卡训练的话,需要将环境变量CUDA_VISIBLE_DEVICES指定为多卡(不指定时默认使用所有的gpu),并使用paddle.distributed.launch启动训练脚本(windows下由于不支持nccl,无法使用多卡训练): + +```shell +export CUDA_VISIBLE_DEVICES=0,1,2,3 # 设置4张可用的卡 +python -m paddle.distributed.launch train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +## 3、恢复训练: +```shell +python train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --resume_model output/iter_500 \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +## 4、训练可视化 + +PaddleSeg会将训练过程中的数据写入VisualDL文件,并实时的查看训练过程中的日志,记录的数据包括: +1. loss变化趋势 +2. 学习率变化趋势 +3. 训练时间 +4. 数据读取时间 +5. mean IoU变化趋势(当打开了`do_eval`开关后生效) +6. mean pixel Accuracy变化趋势(当打开了`do_eval`开关后生效) + +使用如下命令启动VisualDL查看日志 +```shell +# 下述命令会在127.0.0.1上启动一个服务,支持通过前端web页面查看,可以通过--host这个参数指定实际ip地址 +visualdl --logdir output/ +``` + +在浏览器输入提示的网址,效果如下: +![](../images/quick_start_vdl.jpg) diff --git a/docs/whole_process.md b/docs/whole_process.md new file mode 100644 index 0000000000..2ae8c72f81 --- /dev/null +++ b/docs/whole_process.md @@ -0,0 +1,545 @@ +English | [简体中文](whole_process_cn.md) +# Whole Process of PaddleSeg + +We will use `BiSeNetV2` and `Medical Video Disc Segmentation Dataset` as example to introduce PaddleSeg's **configurable driver**. If you want to know how to use API, you can click [PaddleSeg Advanced Tutorial](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0). + +The whole process is as follows: + +1. **Prepare the environment**: PaddleSeg's software environment. +2. **Data preparation**: How to prepare and organize custom datasets. +3. **Model training**: Training configuration and start training. +4. **Visualize the training process**: Use VDL to show the training process. +5. **Model evaluation**: Evaluate the model. +6. **Model prediction and visualization**: Use the trained model to make predictions and visualize the results at the same time. +7. **Model export**: How to export a model that can be deployed. +8. **Model deployment**: Quickly use Python to achieve efficient deployment. + +## **1. Environmental Installation and Verification** + +### **1.1 Environment Installation** + +Before using PaddleSeg to train an image segmentation model, users need to complete the following tasks: + +1. Install [Python3.6 or higher](https://www.python.org/downloads/). +2. Install the `PaddlePaddle 2.1` version, please refer to [Quick Installation](https://www.paddlepaddle.org.cn/install/quick) for the specific installation method. Due to the high computational cost of the image segmentation model, it is recommended to use PaddleSeg under the GPU version of PaddlePaddle. +3. Download the code library of PaddleSeg. + +``` +git clone https://github.com/PaddlePaddle/PaddleSeg.git +``` +``` +#If the github download network is poor, users can choose gitee to download +git clone https://gitee.com/paddlepaddle/PaddleSeg.git +``` +Install the PaddleSeg API library, while installing the library, other dependencies for running PaddleSeg are also installed at the same time +``` +pip install paddleseg +``` + +### **1.2 Confirm Installation** + +Run following commands in the PaddleSeg directory. + +Execute the following command, if the predicted result appears in the PaddleSeg/output folder, the installation is successful. + +``` +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path https://bj.bcebos.com/paddleseg/dygraph/optic_disc/bisenet_optic_disc_512x512_1k/model.pdparams\ + --image_path docs/images/optic_test_image.jpg \ + --save_dir output/result +``` + +## **2. Dataset Preparation** + +**Dataset Download** + +This chapter will use the `optic disc segmentation dataset` for training. Optic disc segmentation is a set of fundus medical segmentation datasets, including 267 training images, 76 verification images, and 38 test images. You can download them by the following command. + +The original image and segmentation result are shown below. Our task will be to segment the optic disc area in the eyeball picture. + +![](./images/fig1.png) + +​ Figure 1: Original image and segmentation result + + + +``` +# Download and unzip the dataset +mkdir dataset +cd dataset +wget https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip +unzip optic_disc_seg.zip +cd .. +``` + +### **2.1 Prepare the Dataset** + +How to use your own dataset for training is the most concerned thing for developers. Below we will focus on explaining what we should prepare if we want to customize the dataset.And we will tell you how to make corresponding changes in the configuration file after the dataset is ready. + +### **2.1.1 Organize the Dataset** + +- It is recommended to organize into the following structure. + + custom_dataset + | + |--images + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--labels + | |--label1.png + | |--label2.png + | |--... + | + |--train.txt + | + |--val.txt + | + |--test.txt + +- It is not necessary for the folder to be named custom_dataset, images, labels, and the user can name it independently. + +- The file in train.txt val.txt test.txt does not have to be in the same directory as the custom_dataset folder, it can be modified through the options in the configuration file. + + The contents of train.txt and val.txt are as follows: + + ``` + + images/image1.jpg labels/label1.png + images/image2.jpg labels/label2.png + ... + ``` + +The format of the dataset we just downloaded is similar (label.txt is optional). If users want to label and divide the dataset, please refer to [Data Marking Document](data/marker/marker.md) and [ dataset division document](data/custom/data_prepare.md). + +我们一般推荐用户将数据集放置在PaddleSeg下的data文件夹下。 + +## **3. Model Training** + +-Choose the BiseNetV2 model here. BiseNetV2 is a lightweight model with an average IoU of 72.6% in the Cityscapes test set and a speed of 156 FPS on an NVIDIA GeForce GTX 1080 Ti card, which is much faster than the existing method , And can achieve better segmentation accuracy. + +### **3.1 BiseNetV2 Model** + +BiSeNetV2 separates low-level network details and high-level semantic classification to achieve high-precision and high-efficiency real-time semantic segmentation. It is a trade-off between speed and accuracy. The architecture includes: + +(1) A detail branch, with shallow wide channels, used to capture low-level details and generate high-resolution feature representations. + +(2) A semantic branch with narrow channels and deep levels to obtain high-level semantic context. Semantic branch is lightweight because it reduces channel capacity and fast downsampling strategy. In addition, a guiding aggregation layer is designed to enhance the mutual connection and fusion of the two types of feature representation. In addition, an enhanced training strategy is also designed to improve segmentation performance without increasing any inference cost. + +![](./images/fig2.png) + +​ Figure 2: Original image and segmentation result + +### **3.2 Detailed Interpretation of Configuration Files** + +After understanding the principle of BiseNetV2, we can prepare for training. In the above, we talked about PaddleSeg providing **configurable driver** for model training. So before training, let’s take a look at the configuration file. Here we take `bisenet_optic_disc_512x512_1k.yml` as an example. The yaml format configuration file includes model type, backbone network, training and testing, pre-training dataset and supporting tools (such as Data augmentation) and other information. + +PaddleSeg lists every option that can be optimized in the configuration file. Users can customize the model by modifying this configuration file (**All configuration files are under the PaddleSeg/configs folder**), such as custom models The backbone network used, the loss function used by the model, and the configuration of the network structure. In addition to customizing the model, data processing strategies can be configured in the configuration file, such as data augmentation strategies such as resizing, normalization, and flipping. + +**Key Parameter:** + +-1: In the learning rate given in the PaddleSeg configuration file, except for the single-card learning rate in "bisenet_optic_disc_512x512_1k.yml", the rest of the configuration files are all 4-card learning rates. If the user is training with a single card, then learn The rate setting should become 1/4 of the original. +-2: The configuration file in PaddleSeg gives a variety of loss functions: CrossEntropy Loss, BootstrappedCrossEntropy Loss, Dice Loss, BCE Loss, OhemCrossEntropyLoss, RelaxBoundaryLoss, OhemEdgeAttentionLoss, Lovasz Hinge Loss, Lovasz Soft Loss, users can perform according to their own needs Change. + +``` +batch_size: 4 # Set the number of pictures sent to the network at one iteration. Generally speaking, the larger the video memory of the machine you are using, the higher the batch_size value. +iters: 1000 # Number of iterations + +train_dataset: # Training dataset + type: OpticDiscSeg # The name of the training dataset class + dataset_root: data/optic_disc_seg # The directory where the training dataset is stored + num_classes: 2 # Number of pixel categories + transforms: # Data transformation and data augmentation + - type: Resize Need to resize before sending to the network + target_size: [512, 512] # Resize the original image to 512*512 and send it to the network + - type: RandomHorizontalFlip # Flip the image horizontally with a certain probability + - type: Normalize # Normalize the image + mode: train + +val_dataset: # Validating dataset + type: OpticDiscSeg # The name of the training dataset class + dataset_root: data/optic_disc_seg # The directory where the validating dataset is stored + num_classes: 2 # Number of pixel categories + transforms: # Data transformation and data augmentation + - type: Resize Need to resize before sending to the network + target_size: [512, 512] # Resize the original image to 512*512 and send it to the network + - type: Normalize # Normalize the image + mode: val + +optimizer: # Set the type of optimizer + type: sgd #Using SGD (Stochastic Gradient Descent) method as the optimizer + momentum: 0.9 + weight_decay: 4.0e-5 # Weight attenuation, the purpose of use is to prevent overfitting + +lr_scheduler: # Related settings for learning rate + type: PolynomialDecay # A type of learning rate,a total of 12 strategies are supported + learning_rate: 0.01 + power: 0.9 + end_lr: 0 + +loss: # Set the type of loss function + types: + - type: CrossEntropyLoss # The type of loss function + coef: [1, 1, 1, 1, 1] + # BiseNetV2 has 4 auxiliary losses, plus a total of five main losses, 1 means weight all_loss = coef_1 * loss_1 + .... + coef_n * loss_n + +model: # Model description + type: BiSeNetV2 # Set model category + pretrained: Null # Set the pretrained model of the model +``` +**FAQ** + +Q: Some readers may have questions, what kind of configuration items are designed in the configuration file, and what kind of configuration items are in the command line parameters of the script? + +A: The information related to the model scheme is in the configuration file, and it also includes data augmentation strategies for the original sample. In addition to the three common parameters of iters, batch_size, and learning_rate, the command line parameters only involve the configuration of the training process. In other words, the configuration file ultimately determines what model to use. + +### **3.3 Modify Configuration Files** + +When the user prepares the dataset, he can specify the location in the configuration file to modify the data path for further training + +Here, we take the "bisenet_optic_disc_512x512_1k.yml" file mentioned in the above article as an example, and select the data configuration part for your explanation. + +Mainly focus on these parameters: + +- The type parameter is Dataset, which represents the recommended data format; +- The dataset_root path contains the path where the label and image are located; in the example: dataset_root: dataset/optic_disc_seg +- train_path is the path of txt; in the example: train_path: dataset/optic_disc_seg/train_list.txt +- num_classes is the category (the background is also counted as a category); +- Transform is a strategy for data preprocessing, users can change according to their actual needs + +``` +train_dataset: + type: Dataset + dataset_root: dataset/optic_disc_seg + train_path: dataset/optic_disc_seg/train_list.txt + num_classes: 2 + transforms: + - type: Resize + target_size: [512, 512] + - type: RandomHorizontalFlip + - type: Normalize + mode: train + +val_dataset: + type: Dataset + dataset_root: dataset/optic_disc_seg + val_path: dataset/optic_disc_seg/val_list.txt + num_classes: 2 + transforms: + - type: Resize + target_size: [512, 512] + - type: Normalize + mode: val +``` + +### **3.4 Start Training** + +After we modify the corresponding configuration parameters, we can get started and experience the use + +``` +export CUDA_VISIBLE_DEVICES=0 # Set 1 usable card + +**Please execute the following command under windows** +**set CUDA_VISIBLE_DEVICES=0** +python train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +-Result file + +``` +output + ├── iter_500 # Means to save the model once at 500 steps + ├── model.pdparams # Model parameters + └── model.pdopt # Optimizer parameters during training + ├── iter_1000 + ├── model.pdparams + └── model.pdopt + └── best_model # #During training, after training, add --do_eval, every time the model is saved, it will be evaled once, and the model with the highest miou will be saved as best_model + └── model.pdparams +``` + +### **3.5 Training Parameters** + +| Parameter | Effection | Is Required | Default | +| :------------------ | :----------------------------------------------------------- | :--------- | :--------------- | +| iters | Number of training iterations | No | The value specified in the configuration file.| | +| batch_size | Batch size on a single card | No | The value specified in the configuration file.| | +| learning_rate | Initial learning rate | No | The value specified in the configuration file.| | +| config | Configuration files | Yes | - | +| save_dir | The root path for saving model and visualdl log files | No | output | +| num_workers | The number of processes used to read data asynchronously, when it is greater than or equal to 1, the child process is started to read dat | No | 0 | +| use_vdl | Whether to enable visualdl to record training data | No | No | +| save_interval | Number of steps between model saving | No | 1000 | +| do_eval | Whether to do evaluation when saving the model, the best model will be saved according to mIoU | No | No | +| log_iters | Interval steps for printing log | No | 10 | +| resume_model | Restore the training model path, such as: `output/iter_1000` | No | None | +| keep_checkpoint_max | Number of latest models saved | No | 5 | + +### **3.6 In-depth Exploration of Configuration Files** + +- We just took out a BiSeNetV2 configuration file for everyone to experience how to configure the dataset. In this example, all the parameters are placed in a yml file, but the actual PaddleSeg configuration file is for better reuse For compatibility and compatibility, a more coupled design is adopted, that is, a model requires more than two configuration files to achieve. Below we will use DeeplabV3p as an example to illustrate the coupling settings of the configuration files. +- For example, if we want to change the configuration of the deeplabv3p_resnet50_os8_cityscapes_1024x512_80k.yml file, we will find that the file also depends on the (base) cityscapes.yml file. At this point, we need to open the cityscapes.yml file synchronously to set the corresponding parameters. + +![](./images/fig3.png) + + + +​ ​ Figure 3: In-depth exploration of configuration files + +In PaddleSeg2.0 mode, users can find that PaddleSeg adopts a more coupled configuration design, placing common configurations such as data, optimizer, and loss function under a single configuration file. When we try to change to a new network The structure is time, you only need to pay attention to model switching, which avoids the tedious rhythm of switching models to re-adjust these common parameters and avoid user errors. + +**FAQ** + +Q: There are some common parameters in multiple configuration files, so which one shall I prevail? + +A: As shown by the serial number in the figure, the parameters of the No. 1 yml file can cover the parameters of the No. 2 yml file, that is, the configuration file No. 1 is better than the No. 2. In addition, if the parameters appearing in the yaml file are specified in the command line, the configuration of the command line is better than the yaml file. (For example: adjust `batch_size` in the command line according to your machine configuration, no need to modify the preset yaml file in configs) + +### **3.7 Muti-card Training** + +**Note**: If you want to use multi-card training, you need to specify the environment variable `CUDA_VISIBLE_DEVICES` as `multi-card` (if not specified, all GPUs will be used by default), and use `paddle.distributed.launch` to start the training script (Can not use multi-card training under Windows, because it doesn't support nccl): + +``` +export CUDA_VISIBLE_DEVICES=0,1,2,3 # Set 4 usable cards +python -m paddle.distributed.launch train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +### **3.8 Resume training** + +``` +python train.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --resume_model output/iter_500 \ + --do_eval \ + --use_vdl \ + --save_interval 500 \ + --save_dir output +``` + +## **4. Training Process Visualization** + +- In order to make our network training process more intuitive and analyze the network to get a better network faster, PaddlePaddle provides a visual analysis tool: VisualDL + +When the `use_vdl` switch is turned on, PaddleSeg will write the data during the training process into the VisualDL file, and you can view the log during the training process in real time. The recorded data includes: + +1. Loss change trend +2. Changes in learning rate +3. Training time +4. Data reading time +5. Mean IoU change trend (takes effect when the `do_eval` switch is turned on) +6. Change trend of mean pixel Accuracy (takes effect when the `do_eval` switch is turned on) + +Use the following command to start VisualDL to view the log + +``` +**The following command will start a service on 127.0.0.1, which supports viewing through the front-end web page, and the actual ip address can be specified through the --host parameter** + +visualdl --logdir output/ +``` + +Enter the suggested URL in the browser, the effect is as follows: + +![](./images/fig4.png) + +​ Figure 4: VDL effect demonstration + +## **5. Model Evaluation** + +After the training is completed, the user can use the evaluation script val.py to evaluate the effect of the model. Assuming that the number of iterations (iters) in the training process is 1000, the interval for saving the model is 500, that is, the training model is saved twice for every 1000 iterations of the dataset. Therefore, there will be a total of 2 regularly saved models, plus the best model best_model saved, there are a total of 3 models. You can specify the model file you want to evaluate through model_path. + +``` +python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams +``` + +If you want to perform multi-scale flip evaluation, you can turn it on by passing in `--aug_eval`, and then passing in scale information via `--scales`, `--flip_horizontal` turns on horizontal flip, and `flip_vertical` turns on vertical flip. Examples of usage are as follows: + +``` +python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --aug_eval \ + --scales 0.75 1.0 1.25 \ + --flip_horizontal +``` + +If you want to perform sliding window evaluation, you can open it by passing in `--is_slide`, pass in the window size by `--crop_size`, and pass in the step size by `--stride`. Examples of usage are as follows: + +``` +python val.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --is_slide \ + --crop_size 256 256 \ + --stride 128 128 +``` + +In the field of image segmentation, evaluating model quality is mainly judged by three indicators, `accuracy` (acc), `mean intersection over union` (mIoU), and `Kappa coefficient`. + +- **Accuracy**: refers to the proportion of pixels with correct category prediction to the total pixels. The higher the accuracy, the better the quality of the model. +- **Average intersection ratio**: perform inference calculations for each category dataset separately, divide the calculated intersection of the predicted area and the actual area by the union of the predicted area and the actual area, and then average the results of all categories. In this example, under normal circumstances, the mIoU index value of the model on the verification set will reach 0.80 or more. An example of the displayed information is shown below. The **mIoU=0.8526** in the third row is mIoU. +- **Kappa coefficient**: an index used for consistency testing, which can be used to measure the effect of classification. The calculation of the kappa coefficient is based on the confusion matrix, with a value between -1 and 1, usually greater than 0. The formula is as follows, P0P_0*P*0 is the accuracy of the classifier, and PeP_e*P**e* is the accuracy of the random classifier. The higher the Kappa coefficient, the better the model quality. + + + +With the running of the evaluation script, the final printed evaluation log is as follows. + +``` +... +2021-01-13 16:41:29 [INFO] Start evaluating (total_samples=76, total_iters=76)... +76/76 [==============================] - 2s 30ms/step - batch_cost: 0.0268 - reader cost: 1.7656e- +2021-01-13 16:41:31 [INFO] [EVAL] #Images=76 mIoU=0.8526 Acc=0.9942 Kappa=0.8283 +2021-01-13 16:41:31 [INFO] [EVAL] Class IoU: +[0.9941 0.7112] +2021-01-13 16:41:31 [INFO] [EVAL] Class Acc: +[0.9959 0.8886] +``` + +## **6.Prediction and Visualization** + +In addition to analyzing the IOU, ACC and Kappa indicators of the model, we can also check the cutting sample effect of some specific samples, and inspire further optimization ideas from Bad Case. + +The predict.py script is specially used to visualize prediction cases. The command format is as follows + +``` +python predict.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams \ + --image_path dataset/optic_disc_seg/JPEGImages/H0003.jpg \ + --save_dir output/result +``` + +Among them, `image_path` can also be a directory. At this time, all the pictures in the directory will be predicted and the visualization results will be saved. + +Similarly, you can use `--aug_pred` to turn on multi-scale flip prediction, and `--is_slide` to turn on sliding window prediction. + +We select 1 picture to view, the effect is as follows. We can intuitively see the difference between the cutting effect of the model and the original mark, thereby generating some optimization ideas, such as whether the cutting boundary can be processed in a regular manner. + +![](./images/fig5.png) + + + +​ ​ Figure 5: Prediction effect display + +## **7 Model Export** + +In order to facilitate the user's industrial-level deployment, PaddleSeg provides a one-click function of moving to static, which is to convert the trained dynamic graph model file into a static graph form. + +``` +python export.py \ + --config configs/quick_start/bisenet_optic_disc_512x512_1k.yml \ + --model_path output/iter_1000/model.pdparams +``` + +- Parameters + +| Parameter | Effection | Is Required | Default | +| :--------- | :--------------------------------- | :--------- | :--------------- | +| config | Configuration file | Yes | - | +| save_dir | The root path for saving model and visualdl log files | No | output | +| model_path | Path of pretrained model parameters | No | The value specified in the configuration file. | + +``` +- Result Files + +output + ├── deploy.yaml # Deployment related configuration files + ├── model.pdiparams # Static graph model parameters + ├── model.pdiparams.info # Additional parameter information, generally don’t need attention + └── model.pdmodel # Static graph model files +``` + +## **8 Model Deploy** + +-PaddleSeg currently supports the following deployment methods: + +| Platform | Library | Tutorial | +| :----------- | :----------- | :----- | +| Python | Paddle prediction library | [e.g.](../deploy/python/) | +| C++ | Paddle prediction library | [e.g.](../deploy/cpp/) | +| Mobile | PaddleLite | [e.g.](../deploy/lite/) | +| Serving | HubServing | Comming soon | +| Front-end | PaddleJS | [e.g.](../deploy/web/) | + +``` +#Run the following command, an image of H0003.png will be generated under the output file +python deploy/python/infer.py \ +--config output/deploy.yaml\ +--image_path dataset/optic_disc_seg/JPEGImages/H0003.jpg\ +--save_dir output +``` + +- Parameters: + +|Parameter|Effection|Is required|Default| +|-|-|-|-| +|config|**Configuration file generated when exporting the model**, instead of the configuration file in the configs directory|Yes|-| +|image_path|The path or directory of the test image.|Yes|-| +|use_trt|Whether to enable TensorRT to accelerate prediction.|No|No| +|use_int8|Whether to run in int8 mode when starting TensorRT prediction.|No|No| +|batch_size|Batch sizein single card.|No|The value specified in the configuration file.| +|save_dir|The directory of prediction results.|No|output| +|with_argmax|Perform argmax operation on the prediction results.|No|No| + +## **9 Custom Software Development** + +- After trying to complete the training with the configuration file, there must be some friends who want to develop more in-depth development based on PaddleSeg. Here, we will briefly introduce the code structure of PaddleSeg. + +``` +PaddleSeg + ├── configs # Configuration file folder + ├── paddleseg # core code for training deployment + ├── core # Start model training, evaluation and prediction interface + ├── cvlibs # The Config class is defined in this folder. It saves all hyperparameters such as dataset, model configuration, backbone network, loss function, etc. + ├── callbacks.py + └── ... + ├── datasets # PaddleSeg supported data formats, including ade, citycapes and other formats + ├── ade.py + ├── citycapes.py + └── ... + ├── models # This folder contains the various parts of the PaddleSeg network + ├── backbone # The backbone network used by paddleseg + ├── hrnet.py + ├── resnet_vd.py + └── ... + ├── layers # Some components, such as the attention mechanism + ├── activation.py + ├── attention.py + └── ... + ├── losses # This folder contains the loss function used by PaddleSeg + ├── dice_loss.py + ├── lovasz_loss.py + └── ... + ├── ann.py # This file represents the algorithm model supported by PaddleSeg, here represents the ann algorithm. + ├── deeplab.py #This file represents the algorithm model supported by PaddleSeg, here it represents the Deeplab algorithm. + ├── unet.py #This file represents the algorithm model supported by PaddleSeg, here it represents the unet algorithm. + └── ... + ├── transforms # Data preprocessing operations, including various data augmentation strategies + ├── functional.py + └── transforms.py + └── utils + ├── config_check.py + ├── visualize.py + └── ... + ├── train.py # The training entry file, which describes the analysis of parameters, the starting method of training, and the resources prepared for training. + ├── predict.py # Prediction file + └── ... + + +``` + +- You can also try to use PaddleSeg's API to develop themselves. After installing PaddleSeg using the pip install command, developers can easily implement the training, evaluation and inference of the image segmentation model with just a few lines of code. Interested friends can visit [PaddleSeg dynamic graph API usage tutorial](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0) + +PaddleSeg and other development kits in various fields have provided top-level solutions for real industrial practice. Some domestic teams have used PaddleSeg's development kits to achieve good results in international competitions. It can be seen that the effects provided by the development kits are State Of The Art. diff --git a/docs/quick_start.md b/docs/whole_process_cn.md similarity index 84% rename from docs/quick_start.md rename to docs/whole_process_cn.md index f1133a2c6d..71ce258f26 100644 --- a/docs/quick_start.md +++ b/docs/whole_process_cn.md @@ -1,26 +1,27 @@ +简体中文 | [English](whole_process.md) # PaddleSeg全流程跑通 -以BiSeNetV2和医学视盘分割数据集为例介绍PaddleSeg的**配置化驱动**使用方式。如果想了解API调用的使用方法,可点击[PaddleSeg高级教程](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0)。 +我们将以`BiSeNetV2`和`医学视盘分割数据集`为例介绍PaddleSeg的**配置化驱动**使用方式。如果想了解API调用的使用方法,可点击[PaddleSeg高级教程](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0)。 -按以下几个步骤来介绍使用流程。 +PaddleSeg的使用流程如下: 1. 准备环境:使用PaddleSeg的软件环境 -2. 数据说明:用户如何自定义数据集 +2. 准备数据:用户如何准备、整理自定义数据集 3. 模型训练:训练配置和启动训练命令 4. 可视化训练过程:使用VDL展示训练过程 5. 模型评估:评估模型效果 -6. 效果可视化:使用训练好的模型进行预测,同时对结果进行可视化 +6. 模型预测与可视化:使用训练好的模型进行预测,同时对结果进行可视化 7. 模型导出:如何导出可进行部署的模型 8. 模型部署:快速使用Python实现高效部署 -**1. 环境安装与验证** +## **1. 环境安装与验证** -**1.1环境安装** +### **1.1 环境安装** 在使用PaddleSeg训练图像分割模型之前,用户需要完成如下任务: 1. 安装[Python3.6或更高版本](https://www.python.org/downloads/)。 -2. 安装飞桨2.0或更高版本,具体安装方法请参见[快速安装](https://www.paddlepaddle.org.cn/install/quick)。由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用PaddleSeg。 +2. 安装`PaddlePaddle 2.1`版本,具体安装方法请参见[快速安装](https://www.paddlepaddle.org.cn/install/quick)。由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用PaddleSeg。 3. 下载PaddleSeg的代码库。 ``` @@ -35,11 +36,11 @@ git clone https://gitee.com/paddlepaddle/PaddleSeg.git pip install paddleseg ``` -**1.2确认环境安装成功** +### **1.2 确认环境安装成功** 下述命令均在PaddleSeg目录下完成 -执行下面命令,并在PaddleSeg/output文件夹中出现预测结果,则证明安装成功 +执行下面命令,如果在PaddleSeg/output文件夹中出现预测结果,则证明安装成功 ``` python predict.py \ @@ -49,13 +50,13 @@ python predict.py \ --save_dir output/result ``` -**2. 数据集下载与说明** +## **2. 数据集下载与说明** **数据集下载** 本章节将使用视盘分割(optic disc segmentation)数据集进行训练,视盘分割是一组眼底医疗分割数据集,包含了267张训练图片、76张验证图片、38张测试图片。通过以下命令可以下载该数据集。 -数据集的原图和效果图如下所示,任务是将眼球图片中的视盘区域分割出来。 +数据集的原图和效果图如下所示,我们的任务将是将眼球图片中的视盘区域分割出来。 ![](./images/fig1.png) @@ -72,14 +73,32 @@ unzip optic_disc_seg.zip cd .. ``` -**2.1数据集说明** +## **2.1 准备数据集** -如何使用自己的数据集进行训练是开发者最关心的事情,下面我们将着重说明一下如果要自定义数据集,我们该准备成什么样子?数据集准备好,如何在配置文件中进行改动。 +如何使用自己的数据集进行训练是开发者最关心的事情,下面我们将着重说明一下如果要自定义数据集,我们该准备成什么样子?以及数据集准备好后,如何在配置文件中作出相应改动。 -**2.1.1数据集说明** +### **2.1.1 整理数据集** - 推荐整理成如下结构 + custom_dataset + | + |--images + | |--image1.jpg + | |--image2.jpg + | |--... + | + |--labels + | |--label1.png + | |--label2.png + | |--... + | + |--train.txt + | + |--val.txt + | + |--test.txt + - 文件夹命名为custom_dataset、images、labels不是必须,用户可以自主进行命名。 - train.txt val.txt test.txt中文件并非要和custom_dataset文件夹在同一目录下,可以通过配置文件中的选项修改. @@ -93,15 +112,15 @@ cd .. ... ``` -我们刚刚下载的数据集格式也与之类似(label.txt可有可以无),如果用户要进行数据集标注和数据划分,请参考文档。 +我们刚刚下载的数据集格式也与之类似(label.txt可有可以无),如果用户要进行数据集标注和数据划分,请参考[数据标注文档](data/marker/marker_cn.md)与[数据集划分文档](data/custom/data_prepare_cn.md)。 我们一般推荐用户将数据集放置在PaddleSeg下的dataset文件夹下。 -**模型训练** +## **3. 模型训练** - 在这里选择BiseNetV2模型,BiseNetV2是一个轻量化模型,在Cityscapes测试集中的平均IoU达到72.6%,在一张NVIDIA GeForce GTX 1080 Ti卡上的速度为156 FPS,这比现有方法要快得多,而且可以实现更好的分割精度。 -**3.1 BiseNetV2模型介绍** +### **3.1 BiseNetV2模型介绍** 双边分割网络(BiSeNet V2),将低层次的网络细节和高层次的语义分类分开处理,以实现高精度和高效率的实时语义分割。它在速度和精度之间进行权衡。该体系结构包括: @@ -113,7 +132,7 @@ cd .. ​ 图2:数据集的原图和效果图 -**3.2 配置文件详细解读** +### **3.2 配置文件详细解读** 在了解完BiseNetV2原理后,我们便可准备进行训练了。上文中我们谈到PaddleSeg提供了**配置化驱动**进行模型训练。那么在训练之前,先来了解一下配置文件,在这里我们以`bisenet_optic_disc_512x512_1k.yml`为例子说明,该yaml格式配置文件包括模型类型、骨干网络、训练和测试、预训练数据集和配套工具(如数据增强)等信息。 @@ -134,7 +153,7 @@ train_dataset: #训练数据设置 num_classes: 2 #指定目标的类别个数(背景也算为一类) transforms: #数据预处理/增强的方式 - type: Resize #送入网络之前需要进行resize - target_size: [512, 512] #将原图resize成512*512在送入网络 + target_size: [512, 512] #将原图resize成512*512再送入网络 - type: RandomHorizontalFlip #采用水平反转的方式进行数据增强 - type: Normalize #图像进行归一化 mode: train @@ -154,12 +173,11 @@ optimizer: #设定优化器的类型 momentum: 0.9 #动量 weight_decay: 4.0e-5 #权值衰减,使用的目的是防止过拟合 -learning_rate: #设定学习率 - value: 0.01 #初始学习率 - decay: - type: poly #采用poly作为学习率衰减方式。 - power: 0.9 #衰减率 - end_lr: 0 #最终学习率 +lr_scheduler: # 学习率的相关设置 + type: PolynomialDecay # 一种学习率类型。共支持12种策略 + learning_rate: 0.01 + power: 0.9 + end_lr: 0 loss: #设定损失函数的类型 types: @@ -177,7 +195,7 @@ Q:有的读者可能会有疑问,什么样的配置项是设计在配置文 A:与模型方案相关的信息均在配置文件中,还包括对原始样本的数据增强策略等。除了iters、batch_size、learning_rate3种常见参数外,命令行参数仅涉及对训练过程的配置。也就是说,配置文件最终决定了使用什么模型。 -**3.3 修改配置文件中对应的数据配置** +### **3.3 修改配置文件中对应的数据配置** 当用户准备好数据集后,可以在配置文件中指定位置修改数据路径来进行进一步的训练 @@ -216,7 +234,7 @@ val_dataset: mode: val ``` -**3.4 正式开启训练** +### **3.4 正式开启训练** 当我们修改好对应的配置参数后,就可以上手体验使用了 @@ -247,7 +265,7 @@ output └── model.pdparams ``` -**3.5 训练参数解释** +### **3.5 训练参数解释** | 参数名 | 用途 | 是否必选项 | 默认值 | | :------------------ | :----------------------------------------------------------- | :--------- | :--------------- | @@ -258,13 +276,13 @@ output | save_dir | 模型和visualdl日志文件的保存根路径 | 否 | output | | num_workers | 用于异步读取数据的进程数量, 大于等于1时开启子进程读取数据 | 否 | 0 | | use_vdl | 是否开启visualdl记录训练数据 | 否 | 否 | -| save_interval_iters | 模型保存的间隔步数 | 否 | 1000 | +| save_interval | 模型保存的间隔步数 | 否 | 1000 | | do_eval | 是否在保存模型时启动评估, 启动时将会根据mIoU保存最佳模型至best_model | 否 | 否 | | log_iters | 打印日志的间隔步数 | 否 | 10 | | resume_model | 恢复训练模型路径,如:`output/iter_1000` | 否 | None | | keep_checkpoint_max | 最新模型保存个数 | 否 | 5 | -**3.6 配置文件的深度探索** +### **3.6 配置文件的深度探索** - 刚刚我们拿出一个BiSeNetV2的配置文件让大家去体验一下如何数据集配置,在这里例子中,所有的参数都放置在了一个yml文件中,但是实际PaddleSeg的配置文件为了具有更好的复用性和兼容性,采用了更加耦合的设计,即一个模型需要两个以上配置文件来实现,下面我们具体一DeeplabV3p为例子来为大家说明配置文件的耦合设置。 - 例如我们要更改deeplabv3p_resnet50_os8_cityscapes_1024x512_80k.yml 文件的配置,则会发现该文件还依赖(base)cityscapes.yml文件。此时,我们就需要同步打开 cityscapes.yml 文件进行相应参数的设置。 @@ -281,11 +299,11 @@ output Q:有些共同的参数,多个配置文件下都有,那么我以哪一个为准呢? -A:如图中序号所示,1号yml文件的参数可以覆盖2号yml文件的参数,即1号的配置文件优于2号文件 +A:如图中序号所示,1号yml文件的参数可以覆盖2号yml文件的参数,即1号的配置文件优于2号文件。另外,如果在命令行中指定了yaml文件中出现的参数,则命令行的配置优于yaml文件。(如:根据你的机器配置在命令行中调整batch_size,无需修改configs中预设的yaml文件) -**3.7 多卡训练** +### **3.7 多卡训练** -**注意**:如果想要使用多卡训练的话,需要将环境变量CUDA_VISIBLE_DEVICES指定为多卡(不指定时默认使用所有的gpu),并使用paddle.distributed.launch启动训练脚本(windows下由于不支持nccl,无法使用多卡训练): +**注意**:如果想要使用多卡训练的话,需要将环境变量`CUDA_VISIBLE_DEVICES`指定为多卡(不指定时默认使用所有的gpu),并使用`paddle.distributed.launch`启动训练脚本(由于windows下不支持nccl,无法使用多卡训练): ``` export CUDA_VISIBLE_DEVICES=0,1,2,3 # 设置4张可用的卡 @@ -297,7 +315,7 @@ python -m paddle.distributed.launch train.py \ --save_dir output ``` -**3.8 恢复训练** +### **3.8 恢复训练** ``` python train.py \ @@ -309,7 +327,7 @@ python train.py \ --save_dir output ``` -**4. 训练过程可视化** +## **4. 训练过程可视化** - 为了更直观我们的网络训练过程,对网络进行分析从而更快速的得到更好的网络,飞桨提供了可视化分析工具:VisualDL @@ -335,7 +353,7 @@ visualdl --logdir output/ ​ 图4:VDL效果演示 -**5. 模型评估** +## **5. 模型评估** 训练完成后,用户可以使用评估脚本val.py来评估模型效果。假设训练过程中迭代次数(iters)为1000,保存模型的间隔为500,即每迭代1000次数据集保存2次训练模型。因此一共会产生2个定期保存的模型,加上保存的最佳模型best_model,一共有3个模型,可以通过model_path指定期望评估的模型文件。 @@ -367,13 +385,13 @@ python val.py \ --stride 128 128 ``` -在图像分割领域中,评估模型质量主要是通过三个指标进行判断,准确率(acc)、平均交并比(Mean Intersection over Union,简称mIoU)、Kappa系数。 +在图像分割领域中,评估模型质量主要是通过三个指标进行判断,`准确率`(acc)、`平均交并比`(Mean Intersection over Union,简称mIoU)、`Kappa系数`。 -- 准确率:指类别预测正确的像素占总像素的比例,准确率越高模型质量越好。 -- 平均交并比:对每个类别数据集单独进行推理计算,计算出的预测区域和实际区域交集除以预测区域和实际区域的并集,然后将所有类别得到的结果取平均。在本例中,正常情况下模型在验证集上的mIoU指标值会达到0.80以上,显示信息示例如下所示,第3行的**mIoU=0.8526**即为mIoU。 -- Kappa系数:一个用于一致性检验的指标,可以用于衡量分类的效果。kappa系数的计算是基于混淆矩阵的,取值为-1到1之间,通常大于0。其公式如下所示,P0P_0*P*0为分类器的准确率,PeP_e*P**e*为随机分类器的准确率。Kappa系数越高模型质量越好。 +- **准确率**:指类别预测正确的像素占总像素的比例,准确率越高模型质量越好。 +- **平均交并比**:对每个类别数据集单独进行推理计算,计算出的预测区域和实际区域交集除以预测区域和实际区域的并集,然后将所有类别得到的结果取平均。在本例中,正常情况下模型在验证集上的mIoU指标值会达到0.80以上,显示信息示例如下所示,第3行的**mIoU=0.8526**即为mIoU。 +- **Kappa系数**:一个用于一致性检验的指标,可以用于衡量分类的效果。kappa系数的计算是基于混淆矩阵的,取值为-1到1之间,通常大于0。其公式如下所示,P0P_0*P*0为分类器的准确率,PeP_e*P**e*为随机分类器的准确率。Kappa系数越高模型质量越好。 -Kappa=P0−Pe1−PeKappa= \frac{P_0-P_e}{1-P_e}*K**a**p**p**a*=1−*P**e**P*0−*P**e* + 随着评估脚本的运行,最终打印的评估日志如下。 @@ -388,7 +406,7 @@ Kappa=P0−Pe1−PeKappa= \frac{P_0-P_e}{1-P_e}*K**a**p**p**a*=1−*P**e**P*0− [0.9959 0.8886] ``` -**6. 效果可视化** +## **6. 预测与效果可视化** 除了分析模型的IOU、ACC和Kappa指标之外,我们还可以查阅一些具体样本的切割样本效果,从Bad Case启发进一步优化的思路。 @@ -414,7 +432,7 @@ python predict.py \ ​ 图5:预测效果展示 -**7 模型导出** +## **7 模型导出** 为了方便用户进行工业级的部署,PaddleSeg提供了一键动转静的功能,即将训练出来的动态图模型文件转化成静态图形式。 ``` @@ -441,7 +459,7 @@ output └── model.pdmodel # 静态图模型文件 ``` -**8 应用部署** +## **8 应用部署** - PaddleSeg目前支持以下部署方式: @@ -473,7 +491,7 @@ python deploy/python/infer.py \ | save_dir | 保存预测结果的目录 | 否 | output | | with_argmax| 对预测结果进行argmax操作 | 否 | 否 | -**9 二次开发** +## **9 二次开发** - 在尝试完成使用配置文件进行训练之后,肯定有小伙伴想基于PaddleSeg进行更深入的开发,在这里,我们大概介绍一下PaddleSeg代码结构, @@ -481,8 +499,8 @@ python deploy/python/infer.py \ PaddleSeg ├── configs #配置文件文件夹 ├── paddleseg #训练部署的核心代码 - ├── core - ├── cvlibs # Config类定义在该文件夹中。它保存了数据集、模型配置、主干网络、损失函数等所有的超参数。 + ├── core # 启动模型训练,评估与预测的接口 + ├── cvlibs # Config类定义在该文件夹中。它保存了数据集、模型配置、主干网络、损失函数等所有的超参数。 ├── callbacks.py └── ... ├── datasets #PaddleSeg支持的数据格式,包括ade、citycapes等多种格式 diff --git a/legacy/README.md b/legacy/README.md deleted file mode 100644 index 963c23fc3a..0000000000 --- a/legacy/README.md +++ /dev/null @@ -1,115 +0,0 @@ -English | [简体中文](README_CN.md) - -# PaddleSeg - -[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleSeg.svg?branch=master)](https://travis-ci.org/PaddlePaddle/PaddleSeg) -[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) -[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases) -![python version](https://img.shields.io/badge/python-3.6+-orange.svg) -![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) - - *[2020-12-02] PaddleSeg has released the [dynamic graph](./dygraph) version, which supports PaddlePaddle 2.0rc. For the static graph, we only fix bugs without adding new features. See detailed [release notes](./docs/release_notes.md).* - -## Introduction - -PaddleSeg is an end-to-end image segmentation development kit based on PaddlePaddle, which aims to help developers in the whole process of training models, optimizing performance and inference speed, and deploying models. Currently PaddleSeg supports seven efficient segmentation models, including DeepLabv3+, U-Net, ICNet, PSPNet, HRNet, Fast-SCNN, and OCRNet, which are extensively used in both academia and industry. Enjoy your Seg journey! - -![demo](./docs/imgs/cityscapes.png) - -## Main Features - -- **Practical Data Augmentation Techniques** - -PaddleSeg provides 10+ data augmentation techniques, which are developed from the product-level applications in Baidu. The techniques are able to help developers improve the generalization and robustness ability of their customized models. - -- **Modular Design** - -PaddleSeg supports seven popular segmentation models, including U-Net, DeepLabv3+, ICNet, PSPNet, HRNet, Fast-SCNN, and OCRNet. Combing with different components, such as pre-trained models, adjustable backbone architectures and loss functions, developer can easily build an efficient segmentation model according to their practical performance requirements. - -- **High Performance** - -PaddleSeg supports the efficient acceleration strategies, such as multi-processing I/O operations, and multi-GPUs parallel training. Moreover, integrating GPU memory optimization techniques in the PaddlePaddle framework, PaddleSeg significantly reduces training overhead of the segmentation models, which helps developers complete the segmentation tasks in a high-efficient way. - -- **Industry-Level Deployment** - -PaddleSeg supports the industry-level deployment in both **server** and **mobile devices** with the high-performance inference engine and image processing ability, which helps developers achieve the high-performance deployment and integration of segmentation model efficiently. Particularly, using another paddle tool [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite), the segmentation models trained in PaddleSeg are able to be deployed on mobile/embedded devices quickly and easily. - -- **Rich Practical Cases** - -PaddleSeg provides rich practical cases in industry, such as human segmentation, mechanical meter segmentation, lane segmentation, remote sensing image segmentation, human parsing, and industry inspection, etc. The practical cases allow developers to get a closer look at the image segmentation area, and get more hand-on experiences on the real practice. - -## Installation - -### 1. Install PaddlePaddle - -System Requirements: -* PaddlePaddle >= 1.7.0 and < 2.0 -* Python >= 3.5+ - -> Note: the above requirements are for the **static** graph version. If you intent to use the dynamic one, please refers to [here](./dygraph). - -Highly recommend you install the GPU version of PaddlePaddle, due to large overhead of segmentation models, otherwise it could be out of memory while running the models. - -For more detailed installation tutorials, please refer to the official website of [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick)。 - -### 2. Download PaddleSeg - -``` -git clone https://github.com/PaddlePaddle/PaddleSeg -``` - -### 3. Install Dependencies -Install the python dependencies via the following commands,and please make sure execute it at least once in your branch. -``` -cd PaddleSeg -pip install -r requirements.txt -``` - -## Tutorials - -For a better understanding of PaddleSeg, we provide comprehensive tutorials to show the whole process of using PaddleSeg on model training, evaluation and deployment. Besides the basic usages of PaddleSeg, the design insights will be also mentioned in the tutorials. - -### Quick Start - -* [PaddleSeg Start](./docs/usage.md) - -### Basic Usages - -* [Customized Data Preparation](./docs/data_prepare.md) -* [Scripts and Config Guide](./docs/config.md) -* [Data and Config Verification](./docs/check.md) -* [Segmentation Models](./docs/models.md) -* [Pretrained Models](./docs/model_zoo.md) -* [DeepLabv3+ Tutorial](./tutorial/finetune_deeplabv3plus.md) - -### Inference and Deployment - -* [Model Export](./docs/model_export.md) -* [Python Inference](./deploy/python/) -* [C++ Inference](./deploy/cpp/) -* [Paddle-Lite Mobile Inference & Deployment](./deploy/lite/) -* [PaddleServing Inference & Deployment](./deploy/paddle-serving) - - -### Advanced features - -* [Data Augmentation](./docs/data_aug.md) -* [Loss Functions](./docs/loss_select.md) -* [Practical Cases](./contrib) -* [Multiprocessing and Mixed-Precision Training](./docs/multiple_gpus_train_and_mixed_precision_train.md) -* Model Compression ([Quantization](./slim/quantization/README.md), [Distillation](./slim/distillation/README.md), [Pruning](./slim/prune/README.md), [NAS](./slim/nas/README.md)) - - -### Online Tutorials - -We further provide a few online tutorials in Baidu AI Studio:[Get Started](https://aistudio.baidu.com/aistudio/projectdetail/100798), [U-Net](https://aistudio.baidu.com/aistudio/projectDetail/102889), [DeepLabv3+](https://aistudio.baidu.com/aistudio/projectDetail/226703), [Industry Inspection](https://aistudio.baidu.com/aistudio/projectdetail/184392), [HumanSeg](https://aistudio.baidu.com/aistudio/projectdetail/475345), [More](https://aistudio.baidu.com/aistudio/projectdetail/226710). - - -## Feedbacks and Contact -* If your question is not answered properly in [FAQ](./docs/faq.md) or you have an idea on PaddleSeg, please report an issue via [Github Issues](https://github.com/PaddlePaddle/PaddleSeg/issues). -* PaddleSeg User Group (QQ): 850378321 or 793114768 - - -## Contributing - -All contributions and suggestions are welcomed. If you want to contribute to PaddleSeg,please summit an issue or create a pull request directly. diff --git a/legacy/README_CN.md b/legacy/README_CN.md deleted file mode 100644 index c953b3cca0..0000000000 --- a/legacy/README_CN.md +++ /dev/null @@ -1,230 +0,0 @@ -简体中文 | [English](README.md) - -# PaddleSeg - -[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleSeg.svg?branch=master)](https://travis-ci.org/PaddlePaddle/PaddleSeg) -[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) -[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases) -![python version](https://img.shields.io/badge/python-3.6+-orange.svg) -![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) - - *[2020-12-02] PaddleSeg已经发布了全新的[动态图版本](./dygraph),全面适配 PaddlePaddle 2.0rc, 静态图版本只作维护不再添加新功能,更多信息请查看详细[更新日志](./docs/release_notes.md).* - -## 简介 - -PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的端到端图像分割开发套件,覆盖了DeepLabv3+, U-Net, ICNet, PSPNet, HRNet, Fast-SCNN等主流分割网络。通过模块化的设计,以配置化方式驱动模型组合,帮助开发者更便捷地完成从训练到部署的全流程图像分割应用。 - -- [特点](#特点) -- [安装](#安装) -- [使用教程](#使用教程) - - [快速入门](#快速入门) - - [基础功能](#基础功能) - - [预测部署](#预测部署) - - [高级功能](#高级功能) -- [在线体验](#在线体验) -- [FAQ](#FAQ) -- [交流与反馈](#交流与反馈) -- [更新日志](#更新日志) -- [贡献代码](#贡献代码) - -## 特点 - -- **丰富的数据增强** - -基于百度视觉技术部的实际业务经验,内置10+种数据增强策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 - -- **模块化设计** - -支持U-Net, DeepLabv3+, ICNet, PSPNet, HRNet, Fast-SCNN六种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, Lovasz Loss等方式可以强化小目标和不均衡样本场景下的分割精度。 - -- **高性能** - -PaddleSeg支持多进程I/O、多卡并行等训练加速策略,结合飞桨核心框架的显存优化功能,可大幅度减少分割模型的显存开销,让开发者更低成本、更高效地完成图像分割训练。 - -- **工业级部署** - -全面提供**服务端**和**移动端**的工业级部署能力,依托飞桨高性能推理引擎和高性能图像处理实现,开发者可以轻松完成高性能的分割模型部署和集成。通过[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite),可以在移动设备或者嵌入式设备上完成轻量级、高性能的人像分割模型部署。 - -- **产业实践案例** - -PaddleSeg提供丰富地产业实践案例,如[人像分割](./contrib/HumanSeg)、[工业表计检测](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib#%E5%B7%A5%E4%B8%9A%E8%A1%A8%E7%9B%98%E5%88%86%E5%89%B2)、[遥感分割](./contrib/RemoteSensing)、[人体解析](contrib/ACE2P),[工业质检](https://aistudio.baidu.com/aistudio/projectdetail/184392)等产业实践案例,助力开发者更便捷地落地图像分割技术。 - -## 安装 - -### 1. 安装PaddlePaddle - -版本要求 -* PaddlePaddle >= 1.7.0 and < 2.0 -* Python >= 3.5+ - -由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用PaddleSeg. - -安装教程请见[PaddlePaddle官网](https://www.paddlepaddle.org.cn/install/quick)。 - -### 2. 下载PaddleSeg代码 - -``` -git clone https://github.com/PaddlePaddle/PaddleSeg -``` - -### 3. 安装PaddleSeg依赖 -通过以下命令安装python包依赖,请确保在该分支上至少执行过一次以下命令: -``` -cd PaddleSeg -pip install -r requirements.txt -``` - -## 使用教程 - -我们提供了一系列的使用教程,来说明如何使用PaddleSeg完成语义分割模型的训练、评估、部署。 - -这一系列的文档被分为**快速入门**、**基础功能**、**预测部署**、**高级功能**四个部分,四个教程由浅至深地介绍PaddleSeg的设计思路和使用方法。 - -### 快速入门 - -* [PaddleSeg快速入门](./docs/usage.md) - -### 基础功能 - -* [自定义数据的标注与准备](./docs/data_prepare.md) -* [脚本使用和配置说明](./docs/config.md) -* [数据和配置校验](./docs/check.md) -* [分割模型介绍](./docs/models.md) -* [预训练模型下载](./docs/model_zoo.md) -* [DeepLabv3+模型使用教程](./tutorial/finetune_deeplabv3plus.md) -* [U-Net模型使用教程](./tutorial/finetune_unet.md) -* [ICNet模型使用教程](./tutorial/finetune_icnet.md) -* [PSPNet模型使用教程](./tutorial/finetune_pspnet.md) -* [HRNet模型使用教程](./tutorial/finetune_hrnet.md) -* [Fast-SCNN模型使用教程](./tutorial/finetune_fast_scnn.md) -* [OCRNet模型使用教程](./tutorial/finetune_ocrnet.md) - -### 预测部署 - -* [模型导出](./docs/model_export.md) -* [Python预测](./deploy/python/) -* [C++预测](./deploy/cpp/) -* [Paddle-Lite移动端预测部署](./deploy/lite/) -* [PaddleServing预测部署](./deploy/paddle-serving) - - -### 高级功能 - -* [PaddleSeg的数据增强](./docs/data_aug.md) -* [PaddleSeg的loss选择](./docs/loss_select.md) -* [PaddleSeg产业实践](./contrib) -* [多进程训练和混合精度训练](./docs/multiple_gpus_train_and_mixed_precision_train.md) -* 使用PaddleSlim进行分割模型压缩([量化](./slim/quantization/README.md), [蒸馏](./slim/distillation/README.md), [剪枝](./slim/prune/README.md), [搜索](./slim/nas/README.md)) -## 在线体验 - -我们在AI Studio平台上提供了在线体验的教程,欢迎体验: - -|在线教程|链接| -|-|-| -|快速开始|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/100798)| -|U-Net图像分割|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/102889)| -|DeepLabv3+图像分割|[点击体验](https://aistudio.baidu.com/aistudio/projectDetail/226703)| -|工业质检(零件瑕疵检测)|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/184392)| -|人像分割|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/475345)| -|PaddleSeg特色垂类模型|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/226710)| - -## FAQ - -#### Q: 安装requirements.txt指定的依赖包时,部分包提示找不到? - -A: 可能是pip源的问题,这种情况下建议切换为官方源,或者通过`pip install -r requirements.txt -i `指定其他源地址。 - -#### Q:图像分割的数据增强如何配置,Unpadding, StepScaling, RangeScaling的原理是什么? - -A: 更详细数据增强文档可以参考[数据增强](./docs/data_aug.md) - -#### Q: 训练时因为某些原因中断了,如何恢复训练? - -A: 启动训练脚本时通过命令行覆盖TRAIN.RESUME_MODEL_DIR配置为模型checkpoint目录即可, 以下代码示例第100轮重新恢复训练: -``` -python pdseg/train.py --cfg xxx.yaml TRAIN.RESUME_MODEL_DIR /PATH/TO/MODEL_CKPT/100 -``` - -#### Q: 预测时图片过大,导致显存不足如何处理? - -A: 降低Batch size,使用Group Norm策略;请注意训练过程中当`DEFAULT_NORM_TYPE`选择`bn`时,为了Batch Norm计算稳定性,batch size需要满足>=2 - - -## 交流与反馈 -* 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/PaddleSeg/issues)来提交问题、报告与建议 -* 微信公众号:飞桨PaddlePaddle -* QQ群: 703252161 - -

     

-

   微信公众号                官方技术交流QQ群

- -## 更新日志 - -* 2020.10.28 - - **`v0.7.0`** - * 全面支持Paddle2.0-rc动态图模式,推出PaddleSeg[动态图体验版](./dygraph/) - * 发布大量动态图模型,支持11个分割模型,4个骨干网络,3个数据集: - * 分割模型:ANN, BiSeNetV2, DANet, DeeplabV3, DeeplabV3+, FCN, FastSCNN, GCNet, OCRNet, PSPNet, UNet - * 骨干网络:ResNet, HRNet, MobileNetV3, Xception - * 数据集:Cityscapes, ADE20K, Pascal VOC - - * 提供高精度骨干网络预训练模型以及基于Cityscapes数据集的语义分割[预训练模型](./dygraph/configs/)。Cityscapes精度超过**82%**。 - - -* 2020.08.31 - - **`v0.6.0`** - * 丰富Deeplabv3p网络结构,新增ResNet-vd、MobileNetv3两种backbone,满足高性能与高精度场景,并提供基于Cityscapes和ImageNet的[预训练模型](./docs/model_zoo.md)4个。 - * 新增高精度分割模型OCRNet,支持以HRNet作为backbone,提供基于Cityscapes的[预训练模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_zoo.md#cityscapes%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B),mIoU超过80%。 - * 新增proposal free的实例分割模型[Spatial Embedding](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/SpatialEmbeddings),性能与精度均超越MaskRCNN。提供了基于kitti的预训练模型。 - -* 2020.05.12 - - **`v0.5.0`** - * 全面升级[HumanSeg人像分割模型](./contrib/HumanSeg),新增超轻量级人像分割模型HumanSeg-lite支持移动端实时人像分割处理,并提供基于光流的视频分割后处理提升分割流畅性。 - * 新增[气象遥感分割方案](./contrib/RemoteSensing),支持积雪识别、云检测等气象遥感场景。 - * 新增[Lovasz Loss](docs/lovasz_loss.md),解决数据类别不均衡问题。 - * 使用VisualDL 2.0作为训练可视化工具 - -* 2020.02.25 - - **`v0.4.0`** - * 新增适用于实时场景且不需要预训练模型的分割网络Fast-SCNN,提供基于Cityscapes的[预训练模型](./docs/model_zoo.md)1个 - * 新增LaneNet车道线检测网络,提供[预训练模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/v0.4.0/contrib/LaneNet#%E4%B8%83-%E5%8F%AF%E8%A7%86%E5%8C%96)一个 - * 新增基于PaddleSlim的分割库压缩策略([量化](./slim/quantization/README.md), [蒸馏](./slim/distillation/README.md), [剪枝](./slim/prune/README.md), [搜索](./slim/nas/README.md)) - - -* 2019.12.15 - - **`v0.3.0`** - * 新增HRNet分割网络,提供基于cityscapes和ImageNet的[预训练模型](./docs/model_zoo.md)8个 - * 支持使用[伪彩色标签](./docs/data_prepare.md#%E7%81%B0%E5%BA%A6%E6%A0%87%E6%B3%A8vs%E4%BC%AA%E5%BD%A9%E8%89%B2%E6%A0%87%E6%B3%A8)进行训练/评估/预测,提升训练体验,并提供将灰度标注图转为伪彩色标注图的脚本 - * 新增[学习率warmup](./docs/configs/solver_group.md#lr_warmup)功能,支持与不同的学习率Decay策略配合使用 - * 新增图像归一化操作的GPU化实现,进一步提升预测速度。 - * 新增Python部署方案,更低成本完成工业级部署。 - * 新增Paddle-Lite移动端部署方案,支持人像分割模型的移动端部署。 - * 新增不同分割模型的预测[性能数据Benchmark](./deploy/python/docs/PaddleSeg_Infer_Benchmark.md), 便于开发者提供模型选型性能参考。 - - -* 2019.11.04 - - **`v0.2.0`** - * 新增PSPNet分割网络,提供基于COCO和cityscapes数据集的[预训练模型](./docs/model_zoo.md)4个。 - * 新增Dice Loss、BCE Loss以及组合Loss配置,支持样本不均衡场景下的[模型优化](./docs/loss_select.md)。 - * 支持[FP16混合精度训练](./docs/multiple_gpus_train_and_mixed_precision_train.md)以及动态Loss Scaling,在不损耗精度的情况下,训练速度提升30%+。 - * 支持[PaddlePaddle多卡多进程训练](./docs/multiple_gpus_train_and_mixed_precision_train.md),多卡训练时训练速度提升15%+。 - * 发布基于UNet的[工业标记表盘分割模型](./contrib#%E5%B7%A5%E4%B8%9A%E7%94%A8%E8%A1%A8%E5%88%86%E5%89%B2)。 - -* 2019.09.10 - - **`v0.1.0`** - * PaddleSeg分割库初始版本发布,包含DeepLabv3+, U-Net, ICNet三类分割模型, 其中DeepLabv3+支持Xception, MobileNet v2两种可调节的骨干网络。 - * CVPR19 LIP人体部件分割比赛冠军预测模型发布[ACE2P](./contrib/ACE2P)。 - * 预置基于DeepLabv3+网络的[人像分割](./contrib/HumanSeg/)和[车道线分割](./contrib/RoadLine)预测模型发布。 - -
- -## 贡献代码 - -我们非常欢迎您为PaddleSeg贡献代码或者提供使用建议。如果您可以修复某个issue或者增加一个新功能,欢迎给我们提交Pull Requests. diff --git a/legacy/configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml b/legacy/configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml deleted file mode 100644 index e4a3d7f59a..0000000000 --- a/legacy/configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml +++ /dev/null @@ -1,51 +0,0 @@ -EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True - TO_RGB: True -BATCH_SIZE: 8 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True - BACKBONE: "resnet_vd_50" - OUTPUT_STRIDE: 8 - BIAS: null - ALIGN_CORNERS: False - BENCHMARK: True - DECODER: - ACT: False -TRAIN: - PRETRAINED_MODEL_DIR: u"pretrained_model/resnet50_vd_imagenet" - MODEL_SAVE_DIR: "output/deeplabv3p_resnet50_vd_bn_cityscapes" - SNAPSHOT_EPOCH: 10 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "output/deeplabv3p_resnet50_vd_bn_cityscapes/final" -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 215 diff --git a/legacy/configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml b/legacy/configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml deleted file mode 100644 index cc0e69ae6a..0000000000 --- a/legacy/configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml +++ /dev/null @@ -1,53 +0,0 @@ -EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: -# AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (1024, 512) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 8 - -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "./dataset/cityscapes/train.list" - VAL_FILE_LIST: "./dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " - -MODEL: - MODEL_NAME: "hrnet" - DEFAULT_NORM_TYPE: "bn" - HRNET: - STAGE2: - NUM_CHANNELS: [18, 36] - STAGE3: - NUM_CHANNELS: [18, 36, 72] - STAGE4: - NUM_CHANNELS: [18, 36, 72, 144] - BIAS: False - ALIGN_CORNERS: False - -TRAIN: - PRETRAINED_MODEL_DIR: u"./pretrained_model/hrnet_w18_ssld" - MODEL_SAVE_DIR: "output/hrnetw18_bn_cityscapes" - SNAPSHOT_EPOCH: 10 - SYNC_BATCH_NORM: True - -TEST: - TEST_MODEL: "output/hrnetw18_bn_cityscapes/best_model" - -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - WEIGHT_DECAY: 5.0e-4 - OPTIMIZER: "sgd" - NUM_EPOCHS: 215 diff --git a/legacy/configs/cityscape_fast_scnn.yaml b/legacy/configs/cityscape_fast_scnn.yaml deleted file mode 100644 index 34bd76be31..0000000000 --- a/legacy/configs/cityscape_fast_scnn.yaml +++ /dev/null @@ -1,53 +0,0 @@ -EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (1024, 1024) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True - FLIP: False - FLIP_RATIO: 0.2 - RICH_CROP: - ENABLE: True - ASPECT_RATIO: 0.0 - BLUR: False - BLUR_RATIO: 0.1 - MAX_ROTATION: 0 - MIN_AREA_RATIO: 0.0 - BRIGHTNESS_JITTER_RATIO: 0.4 - CONTRAST_JITTER_RATIO: 0.4 - SATURATION_JITTER_RATIO: 0.4 -BATCH_SIZE: 12 -MEAN: [0.5, 0.5, 0.5] -STD: [0.5, 0.5, 0.5] -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "fast_scnn" - -TEST: - TEST_MODEL: "snapshots/cityscape_fast_scnn/final/" -TRAIN: - MODEL_SAVE_DIR: "snapshots/cityscape_fast_scnn/" - SNAPSHOT_EPOCH: 10 -SOLVER: - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 100 diff --git a/legacy/configs/deepglobe_road_extraction.yaml b/legacy/configs/deepglobe_road_extraction.yaml deleted file mode 100644 index d6770287a3..0000000000 --- a/legacy/configs/deepglobe_road_extraction.yaml +++ /dev/null @@ -1,45 +0,0 @@ -EVAL_CROP_SIZE: (1025, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling -BATCH_SIZE: 8 -DATASET: - DATA_DIR: "./dataset/MiniDeepGlobeRoadExtraction/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 2 - TEST_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - TRAIN_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/train.txt" - VAL_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - VIS_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - IGNORE_INDEX: 255 - SEPARATOR: '|' -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" - SAVE_DIR: "freeze_model" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - BACKBONE: "mobilenetv2" - DEPTH_MULTIPLIER: 1.0 - ENCODER_WITH_ASPP: False - ENABLE_DECODER: False -TEST: - TEST_MODEL: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_deepglobe_road_extraction/final" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_deepglobe_road_extraction/" - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/" - SNAPSHOT_EPOCH: 10 -SOLVER: - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" - NUM_EPOCHS: 300 diff --git a/legacy/configs/deeplabv3p_mobilenet-1-0_pet.yaml b/legacy/configs/deeplabv3p_mobilenet-1-0_pet.yaml deleted file mode 100644 index 7578034ddc..0000000000 --- a/legacy/configs/deeplabv3p_mobilenet-1-0_pet.yaml +++ /dev/null @@ -1,47 +0,0 @@ -TRAIN_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -EVAL_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (512, 512) # (width, height), for unpadding - - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - - MAX_SCALE_FACTOR: 1.25 # for stepscaling - MIN_SCALE_FACTOR: 0.75 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/mini_pet/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 3 - TEST_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - TRAIN_FILE_LIST: "./dataset/mini_pet/file_list/train_list.txt" - VAL_FILE_LIST: "./dataset/mini_pet/file_list/val_list.txt" - VIS_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - MODEL_NAME: "deeplabv3p" - DEFAULT_NORM_TYPE: "bn" - DEEPLAB: - BACKBONE: "mobilenetv2" - DEPTH_MULTIPLIER: 1.0 - ENCODER_WITH_ASPP: False - ENABLE_DECODER: False -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_pet/" - SNAPSHOT_EPOCH: 10 -TEST: - TEST_MODEL: "./saved_model/deeplabv3p_mobilenetv2-1-0_bn_pet/final" -SOLVER: - NUM_EPOCHS: 100 - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" diff --git a/legacy/configs/deeplabv3p_mobilenetv2_cityscapes.yaml b/legacy/configs/deeplabv3p_mobilenetv2_cityscapes.yaml deleted file mode 100644 index 8a7808525d..0000000000 --- a/legacy/configs/deeplabv3p_mobilenetv2_cityscapes.yaml +++ /dev/null @@ -1,47 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - BACKBONE: "mobilenetv2" - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True - ENCODER_WITH_ASPP: False - ENABLE_DECODER: False -TRAIN: - PRETRAINED_MODEL_DIR: u"pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco" - MODEL_SAVE_DIR: "saved_model/deeplabv3p_mobilenetv2_cityscapes" - SNAPSHOT_EPOCH: 10 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "saved_model/deeplabv3p_mobilenetv2_cityscapes/final" -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 100 diff --git a/legacy/configs/deeplabv3p_mobilenetv3_large_cityscapes.yaml b/legacy/configs/deeplabv3p_mobilenetv3_large_cityscapes.yaml deleted file mode 100644 index a844e28c19..0000000000 --- a/legacy/configs/deeplabv3p_mobilenetv3_large_cityscapes.yaml +++ /dev/null @@ -1,58 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 32 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - BACKBONE: "mobilenetv3_large" - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True - ENCODER_WITH_ASPP: True - ENABLE_DECODER: True - OUTPUT_STRIDE: 32 - BACKBONE_LR_MULT_LIST: [0.15,0.35,0.65,0.85,1] - ENCODER: - POOLING_STRIDE: (4, 5) - POOLING_CROP_SIZE: (769, 769) - ASPP_WITH_SE: True - SE_USE_QSIGMOID: True - ASPP_CONVS_FILTERS: 128 - ASPP_WITH_CONCAT_PROJECTION: False - ADD_IMAGE_LEVEL_FEATURE: False - DECODER: - USE_SUM_MERGE: True - CONV_FILTERS: 19 - OUTPUT_IS_LOGITS: True - -TRAIN: - PRETRAINED_MODEL_DIR: u"pretrained_model/mobilenetv3-1-0_large_bn_imagenet" - MODEL_SAVE_DIR: "saved_model/deeplabv3p_mobilenetv3_large_cityscapes" - SNAPSHOT_EPOCH: 1 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "saved_model/deeplabv3p_mobilenetv3_large_cityscapes/final" -SOLVER: - LR: 0.2 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 850 diff --git a/legacy/configs/deeplabv3p_resnet50_vd_cityscapes.yaml b/legacy/configs/deeplabv3p_resnet50_vd_cityscapes.yaml deleted file mode 100644 index 41b39ee978..0000000000 --- a/legacy/configs/deeplabv3p_resnet50_vd_cityscapes.yaml +++ /dev/null @@ -1,47 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True - TO_RGB: True -BATCH_SIZE: 16 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True - BACKBONE: "resnet_vd_50" - BACKBONE_LR_MULT_LIST: [0.1, 0.1, 0.2, 0.2, 1.0] -TRAIN: - PRETRAINED_MODEL_DIR: u"pretrained_model/resnet50_vd_imagenet" - MODEL_SAVE_DIR: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes" - SNAPSHOT_EPOCH: 10 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "saved_model/deeplabv3p_resnet50_vd_bn_cityscapes/final" -SOLVER: - LR: 0.05 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 700 diff --git a/legacy/configs/deeplabv3p_xception65_cityscapes.yaml b/legacy/configs/deeplabv3p_xception65_cityscapes.yaml deleted file mode 100644 index 1dce747745..0000000000 --- a/legacy/configs/deeplabv3p_xception65_cityscapes.yaml +++ /dev/null @@ -1,44 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True -TRAIN: - PRETRAINED_MODEL_DIR: u"pretrained_model/deeplabv3p_xception65_bn_coco" - MODEL_SAVE_DIR: "saved_model/deeplabv3p_xception65_bn_cityscapes" - SNAPSHOT_EPOCH: 10 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "saved_model/deeplabv3p_xception65_bn_cityscapes/final" -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 100 diff --git a/legacy/configs/deeplabv3p_xception65_optic.yaml b/legacy/configs/deeplabv3p_xception65_optic.yaml deleted file mode 100644 index 311123cfa6..0000000000 --- a/legacy/configs/deeplabv3p_xception65_optic.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "deeplabv3p" - DEFAULT_NORM_TYPE: "bn" - DEEPLAB: - BACKBONE: "xception_65" - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 4 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_xception65_bn_coco/" - MODEL_SAVE_DIR: "./saved_model/deeplabv3p_xception65_bn_optic/" - SNAPSHOT_EPOCH: 5 -TEST: - TEST_MODEL: "./saved_model/deeplabv3p_xception65_bn_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/deeplabv3p_xception65_optic_kunlun.yaml b/legacy/configs/deeplabv3p_xception65_optic_kunlun.yaml deleted file mode 100644 index bad5c9b04c..0000000000 --- a/legacy/configs/deeplabv3p_xception65_optic_kunlun.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "deeplabv3p" - DEFAULT_NORM_TYPE: "bn" - DEEPLAB: - BACKBONE: "xception_65" - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 1 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_xception65_bn_coco/" - MODEL_SAVE_DIR: "./saved_model/deeplabv3p_xception65_bn_optic/" - SNAPSHOT_EPOCH: 2 -TEST: - TEST_MODEL: "./saved_model/deeplabv3p_xception65_bn_optic/final" -SOLVER: - NUM_EPOCHS: 20 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/fast_scnn_pet.yaml b/legacy/configs/fast_scnn_pet.yaml deleted file mode 100644 index 2b9b659f18..0000000000 --- a/legacy/configs/fast_scnn_pet.yaml +++ /dev/null @@ -1,43 +0,0 @@ -TRAIN_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -EVAL_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (512, 512) # (width, height), for unpadding - - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - - MAX_SCALE_FACTOR: 1.25 # for stepscaling - MIN_SCALE_FACTOR: 0.75 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/mini_pet/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 3 - TEST_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - TRAIN_FILE_LIST: "./dataset/mini_pet/file_list/train_list.txt" - VAL_FILE_LIST: "./dataset/mini_pet/file_list/val_list.txt" - VIS_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - MODEL_NAME: "fast_scnn" - DEFAULT_NORM_TYPE: "bn" - -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/fast_scnn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/fast_scnn_pet/" - SNAPSHOT_EPOCH: 10 -TEST: - TEST_MODEL: "./saved_model/fast_scnn_pet/final" -SOLVER: - NUM_EPOCHS: 100 - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" diff --git a/legacy/configs/fcn.yaml b/legacy/configs/fcn.yaml deleted file mode 100644 index 726350b734..0000000000 --- a/legacy/configs/fcn.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "hrnet" - DEFAULT_NORM_TYPE: "bn" - HRNET: - STAGE2: - NUM_CHANNELS: [18, 36] - STAGE3: - NUM_CHANNELS: [18, 36, 72] - STAGE4: - NUM_CHANNELS: [18, 36, 72, 144] - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 1 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/hrnet_w18_bn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/hrnet_optic/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "./saved_model/hrnet_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/hrnet_optic.yaml b/legacy/configs/hrnet_optic.yaml deleted file mode 100644 index 7154bceeea..0000000000 --- a/legacy/configs/hrnet_optic.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "hrnet" - DEFAULT_NORM_TYPE: "bn" - HRNET: - STAGE2: - NUM_CHANNELS: [18, 36] - STAGE3: - NUM_CHANNELS: [18, 36, 72] - STAGE4: - NUM_CHANNELS: [18, 36, 72, 144] - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 4 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/hrnet_w18_bn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/hrnet_optic/" - SNAPSHOT_EPOCH: 5 -TEST: - TEST_MODEL: "./saved_model/hrnet_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/icnet_optic.yaml b/legacy/configs/icnet_optic.yaml deleted file mode 100644 index 0f2742e6cf..0000000000 --- a/legacy/configs/icnet_optic.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "icnet" - DEFAULT_NORM_TYPE: "bn" - MULTI_LOSS_WEIGHT: "[1.0, 0.4, 0.16]" - ICNET: - DEPTH_MULTIPLIER: 0.5 - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 4 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/icnet_bn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/icnet_optic/" - SNAPSHOT_EPOCH: 5 -TEST: - TEST_MODEL: "./saved_model/icnet_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml b/legacy/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml deleted file mode 100644 index 45f5fc724a..0000000000 --- a/legacy/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml +++ /dev/null @@ -1,50 +0,0 @@ -EVAL_CROP_SIZE: (1025, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - FLIP: True -BATCH_SIZE: 24 -DATASET: - DATA_DIR: "./dataset/MiniDeepGlobeRoadExtraction/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 2 - TEST_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - TRAIN_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/train.txt" - VAL_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - VIS_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" - IGNORE_INDEX: 255 - SEPARATOR: '|' -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" - SAVE_DIR: "freeze_model" -MODEL: - DEFAULT_NORM_TYPE: "bn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - BACKBONE: "mobilenetv2" - DEPTH_MULTIPLIER: 1.0 - ENCODER_WITH_ASPP: False - ENABLE_DECODER: False -TEST: - TEST_MODEL: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/final" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/" - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/" - SNAPSHOT_EPOCH: 10 -SOLVER: - LR: 0.1 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 300 - LOSS: ["lovasz_hinge_loss","bce_loss"] - LOSS_WEIGHT: - LOVASZ_HINGE_LOSS: 0.5 - BCE_LOSS: 0.5 diff --git a/legacy/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml b/legacy/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml deleted file mode 100755 index b1c6ff7990..0000000000 --- a/legacy/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml +++ /dev/null @@ -1,49 +0,0 @@ -TRAIN_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #训练时图像裁剪尺寸(宽,高) -EVAL_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #验证时图像裁剪尺寸(宽,高) -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (500, 500) # (width, height), for unpadding - - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - - MAX_SCALE_FACTOR: 1.25 # for stepscaling - MIN_SCALE_FACTOR: 0.75 # for stepscaling - SCALE_STEP_SIZE: 0.05 # for stepscaling - MIRROR: True - FLIP: True -BATCH_SIZE: 16 #批处理大小 -DATASET: - DATA_DIR: "./dataset/VOCtrainval_11-May-2012/VOC2012/" #图片路径 - IMAGE_TYPE: "rgb" # choice rgb or rgba #图片类别“RGB” - NUM_CLASSES: 21 #类别数(包括背景类别) - TEST_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list" - TRAIN_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/train.list" - VAL_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list" - VIS_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -MODEL: - MODEL_NAME: "deeplabv3p" - DEFAULT_NORM_TYPE: "bn" #指定norm的类型,此处提供bn和gn(默认)两种选择,分别指batch norm和group norm。 - DEEPLAB: - BACKBONE: "mobilenetv2" - DEPTH_MULTIPLIER: 1.0 - ENCODER_WITH_ASPP: False - ENABLE_DECODER: False -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/" - MODEL_SAVE_DIR: "./saved_model/lovasz-softmax-voc" #模型保存路径 - SNAPSHOT_EPOCH: 10 -TEST: - TEST_MODEL: "./saved_model/lovasz-softmax-voc/final" #为测试模型路径 -SOLVER: - NUM_EPOCHS: 100 #训练epoch数,正整数 - LR: 0.0001 #初始学习率 - LR_POLICY: "poly" #学习率下降方法, 选项为poly、piecewise和cosine - OPTIMIZER: "sgd" #优化算法, 选项为sgd和adam - LOSS: ["lovasz_softmax_loss","softmax_loss"] - LOSS_WEIGHT: - LOVASZ_SOFTMAX_LOSS: 0.2 - SOFTMAX_LOSS: 0.8 diff --git a/legacy/configs/ocrnet_w18_bn_cityscapes.yaml b/legacy/configs/ocrnet_w18_bn_cityscapes.yaml deleted file mode 100644 index 15fb92ad5a..0000000000 --- a/legacy/configs/ocrnet_w18_bn_cityscapes.yaml +++ /dev/null @@ -1,54 +0,0 @@ -EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: -# AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (1024, 512) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -#BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "./dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "./dataset/cityscapes/train.list" - VAL_FILE_LIST: "./dataset/cityscapes/val.list" - VIS_FILE_LIST: "./dataset/cityscapes/val.list" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "model" - PARAMS_FILENAME: "params" -MODEL: - MODEL_NAME: "ocrnet" - DEFAULT_NORM_TYPE: "bn" - HRNET: - STAGE2: - NUM_CHANNELS: [18, 36] - STAGE3: - NUM_CHANNELS: [18, 36, 72] - STAGE4: - NUM_CHANNELS: [18, 36, 72, 144] - OCR: - OCR_MID_CHANNELS: 512 - OCR_KEY_CHANNELS: 256 - MULTI_LOSS_WEIGHT: [1.0, 1.0] -TRAIN: - PRETRAINED_MODEL_DIR: u"./pretrained_model/ocrnet_w18_cityscape/best_model" - MODEL_SAVE_DIR: "output/ocrnet_w18_bn_cityscapes" - SNAPSHOT_EPOCH: 1 - SYNC_BATCH_NORM: True -TEST: - TEST_MODEL: "output/ocrnet_w18_bn_cityscapes/first" -SOLVER: - LR: 0.01 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 500 diff --git a/legacy/configs/pspnet_optic.yaml b/legacy/configs/pspnet_optic.yaml deleted file mode 100644 index 589e2b53cc..0000000000 --- a/legacy/configs/pspnet_optic.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "pspnet" - DEFAULT_NORM_TYPE: "bn" - PSPNET: - DEPTH_MULTIPLIER: 1 - LAYERS: 50 - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 4 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/pspnet50_bn_cityscapes/" - MODEL_SAVE_DIR: "./saved_model/pspnet_optic/" - SNAPSHOT_EPOCH: 5 -TEST: - TEST_MODEL: "./saved_model/pspnet_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/configs/unet_optic.yaml b/legacy/configs/unet_optic.yaml deleted file mode 100644 index cd564817c7..0000000000 --- a/legacy/configs/unet_optic.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/optic_disc_seg/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - TRAIN_FILE_LIST: "./dataset/optic_disc_seg/train_list.txt" - VAL_FILE_LIST: "./dataset/optic_disc_seg/val_list.txt" - VIS_FILE_LIST: "./dataset/optic_disc_seg/test_list.txt" - -# 预训练模型配置 -MODEL: - MODEL_NAME: "unet" - DEFAULT_NORM_TYPE: "bn" - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "unpadding" - FIX_RESIZE_SIZE: (512, 512) -BATCH_SIZE: 4 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/unet_bn_coco/" - MODEL_SAVE_DIR: "./saved_model/unet_optic/" - SNAPSHOT_EPOCH: 5 -TEST: - TEST_MODEL: "./saved_model/unet_optic/final" -SOLVER: - NUM_EPOCHS: 10 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/contrib/ACE2P/README.md b/legacy/contrib/ACE2P/README.md deleted file mode 100644 index 4c47b6b63a..0000000000 --- a/legacy/contrib/ACE2P/README.md +++ /dev/null @@ -1,101 +0,0 @@ -# Augmented Context Embedding with Edge Perceiving(ACE2P) - -## 模型概述 -人体解析(Human Parsing)是细粒度的语义分割任务,旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。Augmented Context Embedding with Edge Perceiving (ACE2P)通过融合底层特征、全局上下文信息和边缘细节,端到端训练学习人体解析任务。以ACE2P单人人体解析网络为基础的解决方案在CVPR2019第三届Look into Person (LIP)挑战赛中赢得了全部三个人体解析任务的第一名。 - -## 模型框架图 -![](imgs/net.jpg) - -## 模型细节 -ACE2P模型包含三个分支: -* 语义分割分支 -* 边缘检测分支 -* 融合分支 - -语义分割分支采用resnet101作为backbone,通过Pyramid Scene Parsing Network融合上下文信息以获得更加精确的特征表征 - -边缘检测分支采用backbone的中间层特征作为输入,预测二值边缘信息 - -融合分支将语义分割分支以及边缘检测分支的特征进行融合,以获得边缘细节更加准确的分割图像。 - -分割问题一般采用mIoU作为评价指标,特别引入了IoU loss结合cross-entropy loss以针对性优化这一指标 - -测试阶段,采用多尺度以及水平翻转的结果进行融合生成最终预测结果 - -训练阶段,采用余弦退火的学习率策略, 并且在学习初始阶段采用线性warm up - -数据预处理方面,保持图片比例并进行随机缩放,随机旋转,水平翻转作为数据增强策略 - -## LIP指标 - -该模型在测试尺度为'377,377,473,473,567,567'且水平翻转的情况下,meanIoU为62.63 - -多模型ensemble后meanIoU为65.18, 居LIP Single-Person Human Parsing Track榜单第一 - - -## 模型预测效果展示 - -![](imgs/result.jpg) - -人体解析(Human Parsing)是细粒度的语义分割任务,旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。本章节使用冠军模型Augmented Context Embedding with Edge Perceiving (ACE2P)进行预测分割。 - -## 代码使用说明 - -### 1. 模型下载 - -执行以下命令下载并解压ACE2P预测模型: - -``` -python download_ACE2P.py -``` - -或点击[链接](https://paddleseg.bj.bcebos.com/models/ACE2P.tgz)进行手动下载, 并在contrib/ACE2P下解压。 - -### 2. 数据下载 - -测试图片共10000张, -点击 [Baidu_Drive](https://pan.baidu.com/s/1nvqmZBN#list/path=%2Fsharelink2787269280-523292635003760%2FLIP%2FLIP&parentPath=%2Fsharelink2787269280-523292635003760) -下载Testing_images.zip,或前往LIP数据集官网进行下载。 -下载后解压到./data文件夹下 - - -### 3. 快速预测 - -使用GPU预测 -``` -python -u infer.py --example ACE2P --use_gpu -``` - -使用CPU预测: -``` -python -u infer.py --example ACE2P -``` - -**NOTE:** 运行该模型需要2G左右显存。由于数据图片较多,预测过程将比较耗时。 - -#### 4. 预测结果示例: - - 原图: - - ![](imgs/117676_2149260.jpg) - - 预测结果: - - ![](imgs/117676_2149260.png) - -### 备注 - -1. 数据及模型路径等详细配置见ACE2P/HumanSeg/RoadLine下的config.py文件 -2. ACE2P模型需预留2G显存,若显存超可调小FLAGS_fraction_of_gpu_memory_to_use - -## 引用 - -**论文** - -*Devil in the Details: Towards Accurate Single and Multiple Human Parsing* https://arxiv.org/abs/1809.05996 - -**代码** - -https://github.com/Microsoft/human-pose-estimation.pytorch - -https://github.com/liutinglt/CE2P diff --git a/legacy/contrib/ACE2P/__init__.py b/legacy/contrib/ACE2P/__init__.py deleted file mode 100644 index 0e7c9f3954..0000000000 --- a/legacy/contrib/ACE2P/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/ACE2P/config.py b/legacy/contrib/ACE2P/config.py deleted file mode 100644 index 68aeca0243..0000000000 --- a/legacy/contrib/ACE2P/config.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from utils.util import AttrDict, merge_cfg_from_args, get_arguments -import os - -args = get_arguments() -cfg = AttrDict() - -# 待预测图像所在路径 -cfg.data_dir = os.path.join("data", "testing_images") -# 待预测图像名称列表 -cfg.data_list_file = os.path.join("data", "test_id.txt") -# 模型加载路径 -cfg.model_path = args.example -# 预测结果保存路径 -cfg.vis_dir = "result" - -# 预测类别数 -cfg.class_num = 20 -# 均值, 图像预处理减去的均值 -cfg.MEAN = 0.406, 0.456, 0.485 -# 标准差,图像预处理除以标准差 -cfg.STD = 0.225, 0.224, 0.229 - -# 多尺度预测时图像尺寸 -cfg.multi_scales = (377, 377), (473, 473), (567, 567) -# 多尺度预测时图像是否水平翻转 -cfg.flip = True - -merge_cfg_from_args(args, cfg) diff --git a/legacy/contrib/ACE2P/download_ACE2P.py b/legacy/contrib/ACE2P/download_ACE2P.py deleted file mode 100644 index 419e93d4ab..0000000000 --- a/legacy/contrib/ACE2P/download_ACE2P.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - -if __name__ == "__main__": - download_file_and_uncompress( - url='https://paddleseg.bj.bcebos.com/models/ACE2P.tgz', - savepath=LOCAL_PATH, - extrapath=LOCAL_PATH, - extraname='ACE2P') - - print("Pretrained Model download success!") diff --git a/legacy/contrib/ACE2P/imgs/117676_2149260.jpg b/legacy/contrib/ACE2P/imgs/117676_2149260.jpg deleted file mode 100644 index 8314d8f8cc..0000000000 Binary files a/legacy/contrib/ACE2P/imgs/117676_2149260.jpg and /dev/null differ diff --git a/legacy/contrib/ACE2P/imgs/117676_2149260.png b/legacy/contrib/ACE2P/imgs/117676_2149260.png deleted file mode 100644 index e3a9529644..0000000000 Binary files a/legacy/contrib/ACE2P/imgs/117676_2149260.png and /dev/null differ diff --git a/legacy/contrib/ACE2P/imgs/net.jpg b/legacy/contrib/ACE2P/imgs/net.jpg deleted file mode 100644 index 1893315f74..0000000000 Binary files a/legacy/contrib/ACE2P/imgs/net.jpg and /dev/null differ diff --git a/legacy/contrib/ACE2P/imgs/result.jpg b/legacy/contrib/ACE2P/imgs/result.jpg deleted file mode 100644 index f9d0d6e224..0000000000 Binary files a/legacy/contrib/ACE2P/imgs/result.jpg and /dev/null differ diff --git a/legacy/contrib/ACE2P/infer.py b/legacy/contrib/ACE2P/infer.py deleted file mode 100644 index 838140e37f..0000000000 --- a/legacy/contrib/ACE2P/infer.py +++ /dev/null @@ -1,150 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import cv2 -import numpy as np -from utils.util import get_arguments -from utils.palette import get_palette -from PIL import Image as PILImage -import importlib - -args = get_arguments() -config = importlib.import_module('config') -cfg = getattr(config, 'cfg') - -# paddle垃圾回收策略FLAG,ACE2P模型较大,当显存不够时建议开启 -os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0' - -import paddle.fluid as fluid - - -# 预测数据集类 -class TestDataSet(): - def __init__(self): - self.data_dir = cfg.data_dir - self.data_list_file = cfg.data_list_file - self.data_list = self.get_data_list() - self.data_num = len(self.data_list) - - def get_data_list(self): - # 获取预测图像路径列表 - data_list = [] - data_file_handler = open(self.data_list_file, 'r') - for line in data_file_handler: - img_name = line.strip() - name_prefix = img_name.split('.')[0] - if len(img_name.split('.')) == 1: - img_name = img_name + '.jpg' - img_path = os.path.join(self.data_dir, img_name) - data_list.append(img_path) - return data_list - - def preprocess(self, img): - # 图像预处理 - if cfg.example == 'ACE2P': - reader = importlib.import_module('reader') - ACE2P_preprocess = getattr(reader, 'preprocess') - img = ACE2P_preprocess(img) - else: - img = cv2.resize(img, cfg.input_size).astype(np.float32) - img -= np.array(cfg.MEAN) - img /= np.array(cfg.STD) - img = img.transpose((2, 0, 1)) - img = np.expand_dims(img, axis=0) - return img - - def get_data(self, index): - # 获取图像信息 - img_path = self.data_list[index] - img = cv2.imread(img_path, cv2.IMREAD_COLOR) - if img is None: - return img, img, img_path, None - - img_name = img_path.split(os.sep)[-1] - name_prefix = img_name.replace('.' + img_name.split('.')[-1], '') - img_shape = img.shape[:2] - img_process = self.preprocess(img) - - return img, img_process, name_prefix, img_shape - - -def infer(): - if not os.path.exists(cfg.vis_dir): - os.makedirs(cfg.vis_dir) - palette = get_palette(cfg.class_num) - # 人像分割结果显示阈值 - thresh = 120 - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - - # 加载预测模型 - test_prog, feed_name, fetch_list = fluid.io.load_inference_model( - dirname=cfg.model_path, executor=exe, params_filename='__params__') - - #加载预测数据集 - test_dataset = TestDataSet() - data_num = test_dataset.data_num - - for idx in range(data_num): - # 数据获取 - ori_img, image, im_name, im_shape = test_dataset.get_data(idx) - if image is None: - print(im_name, 'is None') - continue - - # 预测 - if cfg.example == 'ACE2P': - # ACE2P模型使用多尺度预测 - reader = importlib.import_module('reader') - multi_scale_test = getattr(reader, 'multi_scale_test') - parsing, logits = multi_scale_test(exe, test_prog, feed_name, - fetch_list, image, im_shape) - else: - # HumanSeg,RoadLine模型单尺度预测 - result = exe.run( - program=test_prog, - feed={feed_name[0]: image}, - fetch_list=fetch_list) - parsing = np.argmax(result[0][0], axis=0) - parsing = cv2.resize(parsing.astype(np.uint8), im_shape[::-1]) - - # 预测结果保存 - result_path = os.path.join(cfg.vis_dir, im_name + '.png') - if cfg.example == 'HumanSeg': - logits = result[0][0][1] * 255 - logits = cv2.resize(logits, im_shape[::-1]) - ret, logits = cv2.threshold(logits, thresh, 0, cv2.THRESH_TOZERO) - logits = 255 * (logits - thresh) / (255 - thresh) - # 将分割结果添加到alpha通道 - rgba = np.concatenate((ori_img, np.expand_dims(logits, axis=2)), - axis=2) - cv2.imwrite(result_path, rgba) - else: - output_im = PILImage.fromarray(np.asarray(parsing, dtype=np.uint8)) - output_im.putpalette(palette) - output_im.save(result_path) - - if (idx + 1) % 100 == 0: - print('%d processd' % (idx + 1)) - - print('%d processd done' % (idx + 1)) - - return 0 - - -if __name__ == "__main__": - infer() diff --git a/legacy/contrib/ACE2P/reader.py b/legacy/contrib/ACE2P/reader.py deleted file mode 100644 index 892f45ad25..0000000000 --- a/legacy/contrib/ACE2P/reader.py +++ /dev/null @@ -1,117 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import paddle.fluid as fluid -from config import cfg -import cv2 - - -def get_affine_points(src_shape, dst_shape, rot_grad=0): - # 获取图像和仿射后图像的三组对应点坐标 - # 三组点为仿射变换后图像的中心点, [w/2,0], [0,0],及对应原始图像的点 - if dst_shape[0] == 0 or dst_shape[1] == 0: - raise Exception('scale shape should not be 0') - - # 旋转角度 - rotation = rot_grad * np.pi / 180.0 - sin_v = np.sin(rotation) - cos_v = np.cos(rotation) - - dst_ratio = float(dst_shape[0]) / dst_shape[1] - h, w = src_shape - src_ratio = float(h) / w if w != 0 else 0 - affine_shape = [h, h * dst_ratio] if src_ratio > dst_ratio \ - else [w / dst_ratio, w] - - # 原始图像三组点 - points = [[0, 0]] * 3 - points[0] = (np.array([w, h]) - 1) * 0.5 - points[1] = points[0] + 0.5 * affine_shape[0] * np.array([sin_v, -cos_v]) - points[2] = points[1] - 0.5 * affine_shape[1] * np.array([cos_v, sin_v]) - - # 仿射变换后图三组点 - points_trans = [[0, 0]] * 3 - points_trans[0] = (np.array(dst_shape[::-1]) - 1) * 0.5 - points_trans[1] = [points_trans[0][0], 0] - - return points, points_trans - - -def preprocess(im): - # ACE2P模型数据预处理 - im_shape = im.shape[:2] - input_images = [] - for i, scale in enumerate(cfg.multi_scales): - # 获取图像和仿射变换后图像的对应点坐标 - points, points_trans = get_affine_points(im_shape, scale) - # 根据对应点集获得仿射矩阵 - trans = cv2.getAffineTransform( - np.float32(points), np.float32(points_trans)) - # 根据仿射矩阵对图像进行仿射 - input = cv2.warpAffine(im, trans, scale[::-1], flags=cv2.INTER_LINEAR) - - # 减均值测,除以方差,转换数据格式为NCHW - input = input.astype(np.float32) - input = (input / 255. - np.array(cfg.MEAN)) / np.array(cfg.STD) - input = input.transpose(2, 0, 1).astype(np.float32) - input = np.expand_dims(input, 0) - - # 水平翻转 - if cfg.flip: - flip_input = input[:, :, :, ::-1] - input_images.append(np.vstack((input, flip_input))) - else: - input_images.append(input) - - return input_images - - -def multi_scale_test(exe, test_prog, feed_name, fetch_list, input_ims, - im_shape): - - # 由于部分类别分左右部位, flipped_idx为其水平翻转后对应的标签 - flipped_idx = (15, 14, 17, 16, 19, 18) - ms_outputs = [] - - # 多尺度预测 - for idx, scale in enumerate(cfg.multi_scales): - input_im = input_ims[idx] - parsing_output = exe.run( - program=test_prog, - feed={feed_name[0]: input_im}, - fetch_list=fetch_list) - output = parsing_output[0][0] - if cfg.flip: - # 若水平翻转,对部分类别进行翻转,与原始预测结果取均值 - flipped_output = parsing_output[0][1] - flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :] - flipped_output = flipped_output[:, :, ::-1] - output += flipped_output - output *= 0.5 - - output = np.transpose(output, [1, 2, 0]) - # 仿射变换回图像原始尺寸 - points, points_trans = get_affine_points(im_shape, scale) - M = cv2.getAffineTransform(np.float32(points_trans), np.float32(points)) - logits_result = cv2.warpAffine( - output, M, im_shape[::-1], flags=cv2.INTER_LINEAR) - ms_outputs.append(logits_result) - - # 多尺度预测结果求均值,求预测概率最大的类别 - ms_fused_parsing_output = np.stack(ms_outputs) - ms_fused_parsing_output = np.mean(ms_fused_parsing_output, axis=0) - parsing = np.argmax(ms_fused_parsing_output, axis=2) - return parsing, ms_fused_parsing_output diff --git a/legacy/contrib/ACE2P/utils/__init__.py b/legacy/contrib/ACE2P/utils/__init__.py deleted file mode 100644 index 0e7c9f3954..0000000000 --- a/legacy/contrib/ACE2P/utils/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/ACE2P/utils/palette.py b/legacy/contrib/ACE2P/utils/palette.py deleted file mode 100644 index f7f15441b2..0000000000 --- a/legacy/contrib/ACE2P/utils/palette.py +++ /dev/null @@ -1,39 +0,0 @@ -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -## Created by: RainbowSecret -## Microsoft Research -## yuyua@microsoft.com -## Copyright (c) 2018 -## -## This source code is licensed under the MIT-style license found in the -## LICENSE file in the root directory of this source tree -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np -import cv2 - - -def get_palette(num_cls): - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette diff --git a/legacy/contrib/ACE2P/utils/util.py b/legacy/contrib/ACE2P/utils/util.py deleted file mode 100644 index 955eb02b63..0000000000 --- a/legacy/contrib/ACE2P/utils/util.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import argparse -import os - - -def get_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--use_gpu", action="store_true", help="Use gpu or cpu to test.") - parser.add_argument( - '--example', type=str, help='RoadLine, HumanSeg or ACE2P') - - return parser.parse_args() - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - - def __getattr__(self, name): - if name in self.__dict__: - return self.__dict__[name] - elif name in self: - return self[name] - else: - raise AttributeError(name) - - def __setattr__(self, name, value): - if name in self.__dict__: - self.__dict__[name] = value - else: - self[name] = value - - -def merge_cfg_from_args(args, cfg): - """Merge config keys, values in args into the global config.""" - for k, v in vars(args).items(): - d = cfg - try: - value = eval(v) - except: - value = v - if value is not None: - cfg[k] = value diff --git a/legacy/contrib/HumanSeg/README.md b/legacy/contrib/HumanSeg/README.md deleted file mode 100644 index 2a455e2fc3..0000000000 --- a/legacy/contrib/HumanSeg/README.md +++ /dev/null @@ -1,211 +0,0 @@ -# HumanSeg人像分割模型 - -本教程基于PaddleSeg核心分割网络,提供针对人像分割场景从预训练模型、Fine-tune、视频分割预测部署的全流程应用指南。最新发布HumanSeg-lite模型超轻量级人像分割模型,支持移动端场景的实时分割。 - -## 环境依赖 - -* Python == 3.5/3.6/3.7 -* PaddlePaddle >= 1.7.2 - -PaddlePaddle的安装可参考[飞桨快速安装](https://www.paddlepaddle.org.cn/install/quick) - -通过以下命令安装python包依赖,请确保在该分支上至少执行过一次以下命令 -```shell -$ pip install -r requirements.txt -``` - -## 预训练模型 -HumanSeg开放了在大规模人像数据上训练的三个预训练模型,满足多种使用场景的需求 - -| 模型类型 | Checkpoint | Inference Model | Quant Inference Model | 备注 | -| --- | --- | --- | ---| --- | -| HumanSeg-server | [humanseg_server_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_ckpt.zip) | [humanseg_server_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip) | -- | 高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 输入大小(512, 512) | -| HumanSeg-mobile | [humanseg_mobile_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_ckpt.zip) | [humanseg_mobile_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip) | [humanseg_mobile_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip) | 轻量级模型, 适用于移动端或服务端CPU的前置摄像头场景,模型结构为HRNet_w18_samll_v1,输入大小(192, 192) | -| HumanSeg-lite | [humanseg_lite_ckpt](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_ckpt.zip) | [humanseg_lite_inference](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_inference.zip) | [humanseg_lite_quant](https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_quant.zip) | 超轻量级模型, 适用于手机自拍人像,且有移动端实时分割场景, 模型结构为优化的ShuffleNetV2,输入大小(192, 192) | - - -模型性能 - -| 模型 | 模型大小 | 计算耗时 | -| --- | --- | --- | -|humanseg_server_inference| 158M | - | -|humanseg_mobile_inference | 5.8 M | 42.35ms | -|humanseg_mobile_quant | 1.6M | 24.93ms | -|humanseg_lite_inference | 541K | 17.26ms | -|humanseg_lite_quant | 187k | 11.89ms | - -计算耗时运行环境: 小米,cpu:骁龙855, 内存:6GB, 图片大小:192*192) - - -**NOTE:** -其中Checkpoint为模型权重,用于Fine-tuning场景。 - -* Inference Model和Quant Inference Model为预测部署模型,包含`__model__`计算图结构、`__params__`模型参数和`model.yaml`基础的模型配置信息。 - -* 其中Inference Model适用于服务端的CPU和GPU预测部署,Qunat Inference Model为量化版本,适用于通过Paddle Lite进行移动端等端侧设备部署。更多Paddle Lite部署说明查看[Paddle Lite文档](https://paddle-lite.readthedocs.io/zh/latest/) - -执行以下脚本进行HumanSeg预训练模型的下载 -```bash -python pretrained_weights/download_pretrained_weights.py -``` - -## 下载测试数据 -我们提供了[supervise.ly](https://supervise.ly/)发布人像分割数据集**Supervisely Persons**, 从中随机抽取一小部分并转化成PaddleSeg可直接加载数据格式。通过运行以下代码进行快速下载,其中包含手机前置摄像头的人像测试视频`video_test.mp4`. - -```bash -python data/download_data.py -``` - -## 快速体验视频流人像分割 -结合DIS(Dense Inverse Search-basedmethod)光流算法预测结果与分割结果,改善视频流人像分割 -```bash -# 通过电脑摄像头进行实时分割处理 -python video_infer.py --model_dir pretrained_weights/humanseg_lite_inference - -# 对人像视频进行分割处理 -python video_infer.py --model_dir pretrained_weights/humanseg_lite_inference --video_path data/video_test.mp4 -``` - -视频分割结果如下: - - - -根据所选背景进行背景替换,背景可以是一张图片,也可以是一段视频。 -```bash -# 通过电脑摄像头进行实时背景替换处理, 也可通过'--background_video_path'传入背景视频 -python bg_replace.py --model_dir pretrained_weights/humanseg_lite_inference --background_image_path data/background.jpg - -# 对人像视频进行背景替换处理, 也可通过'--background_video_path'传入背景视频 -python bg_replace.py --model_dir pretrained_weights/humanseg_lite_inference --video_path data/video_test.mp4 --background_image_path data/background.jpg - -# 对单张图像进行背景替换 -python bg_replace.py --model_dir pretrained_weights/humanseg_lite_inference --image_path data/human_image.jpg --background_image_path data/background.jpg - -``` - -背景替换结果如下: - - - - -**NOTE**: - -视频分割处理时间需要几分钟,请耐心等待。 - -提供的模型适用于手机摄像头竖屏拍摄场景,宽屏效果会略差一些。 - -## 训练 -使用下述命令基于与训练模型进行Fine-tuning,请确保选用的模型结构`model_type`与模型参数`pretrained_weights`匹配。 -```bash -python train.py --model_type HumanSegMobile \ ---save_dir output/ \ ---data_dir data/mini_supervisely \ ---train_list data/mini_supervisely/train.txt \ ---val_list data/mini_supervisely/val.txt \ ---pretrained_weights pretrained_weights/humanseg_mobile_ckpt \ ---batch_size 8 \ ---learning_rate 0.001 \ ---num_epochs 10 \ ---image_shape 192 192 -``` -其中参数含义如下: -* `--model_type`: 模型类型,可选项为:HumanSegServer、HumanSegMobile和HumanSegLite -* `--save_dir`: 模型保存路径 -* `--data_dir`: 数据集路径 -* `--train_list`: 训练集列表路径 -* `--val_list`: 验证集列表路径 -* `--pretrained_weights`: 预训练模型路径 -* `--batch_size`: 批大小 -* `--learning_rate`: 初始学习率 -* `--num_epochs`: 训练轮数 -* `--image_shape`: 网络输入图像大小(w, h) - -更多命令行帮助可运行下述命令进行查看: -```bash -python train.py --help -``` -**NOTE** -可通过更换`--model_type`变量与对应的`--pretrained_weights`使用不同的模型快速尝试。 - -## 评估 -使用下述命令进行评估 -```bash -python val.py --model_dir output/best_model \ ---data_dir data/mini_supervisely \ ---val_list data/mini_supervisely/val.txt \ ---image_shape 192 192 -``` -其中参数含义如下: -* `--model_dir`: 模型路径 -* `--data_dir`: 数据集路径 -* `--val_list`: 验证集列表路径 -* `--image_shape`: 网络输入图像大小(w, h) - -## 预测 -使用下述命令进行预测, 预测结果默认保存在`./output/result/`文件夹中。 -```bash -python infer.py --model_dir output/best_model \ ---data_dir data/mini_supervisely \ ---test_list data/mini_supervisely/test.txt \ ---save_dir output/result \ ---image_shape 192 192 -``` -其中参数含义如下: -* `--model_dir`: 模型路径 -* `--data_dir`: 数据集路径 -* `--test_list`: 测试集列表路径 -* `--image_shape`: 网络输入图像大小(w, h) - -## 模型导出 -```bash -python export.py --model_dir output/best_model \ ---save_dir output/export -``` -其中参数含义如下: -* `--model_dir`: 模型路径 -* `--save_dir`: 导出模型保存路径 - -## 离线量化 -```bash -python quant_offline.py --model_dir output/best_model \ ---data_dir data/mini_supervisely \ ---quant_list data/mini_supervisely/val.txt \ ---save_dir output/quant_offline \ ---image_shape 192 192 -``` -其中参数含义如下: -* `--model_dir`: 待量化模型路径 -* `--data_dir`: 数据集路径 -* `--quant_list`: 量化数据集列表路径,一般直接选择训练集或验证集 -* `--save_dir`: 量化模型保存路径 -* `--image_shape`: 网络输入图像大小(w, h) - -## 在线量化 -利用float训练模型进行在线量化。 -```bash -python quant_online.py --model_type HumanSegMobile \ ---save_dir output/quant_online \ ---data_dir data/mini_supervisely \ ---train_list data/mini_supervisely/train.txt \ ---val_list data/mini_supervisely/val.txt \ ---pretrained_weights output/best_model \ ---batch_size 2 \ ---learning_rate 0.001 \ ---num_epochs 2 \ ---image_shape 192 192 -``` -其中参数含义如下: -* `--model_type`: 模型类型,可选项为:HumanSegServer、HumanSegMobile和HumanSegLite -* `--save_dir`: 模型保存路径 -* `--data_dir`: 数据集路径 -* `--train_list`: 训练集列表路径 -* `--val_list`: 验证集列表路径 -* `--pretrained_weights`: 预训练模型路径, -* `--batch_size`: 批大小 -* `--learning_rate`: 初始学习率 -* `--num_epochs`: 训练轮数 -* `--image_shape`: 网络输入图像大小(w, h) - -## AIStudio在线教程 - -我们在AI Studio平台上提供了人像分割在线体验的教程,[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/475345) diff --git a/legacy/contrib/HumanSeg/bg_replace.py b/legacy/contrib/HumanSeg/bg_replace.py deleted file mode 100644 index 3dfd3e2366..0000000000 --- a/legacy/contrib/HumanSeg/bg_replace.py +++ /dev/null @@ -1,290 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import os.path as osp -import cv2 -import numpy as np - -from utils.humanseg_postprocess import postprocess, threshold_mask -import models -import transforms - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg inference for video') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for inference', - type=str) - parser.add_argument( - '--image_path', - dest='image_path', - help='Image including human', - type=str, - default=None) - parser.add_argument( - '--background_image_path', - dest='background_image_path', - help='Background image for replacing', - type=str, - default=None) - parser.add_argument( - '--video_path', - dest='video_path', - help='Video path for inference', - type=str, - default=None) - parser.add_argument( - '--background_video_path', - dest='background_video_path', - help='Background video path for replacing', - type=str, - default=None) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the inference results', - type=str, - default='./output') - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - - return parser.parse_args() - - -def predict(img, model, test_transforms): - model.arrange_transform(transforms=test_transforms, mode='test') - img, im_info = test_transforms(img) - img = np.expand_dims(img, axis=0) - result = model.exe.run( - model.test_prog, - feed={'image': img}, - fetch_list=list(model.test_outputs.values())) - score_map = result[1] - score_map = np.squeeze(score_map, axis=0) - score_map = np.transpose(score_map, (1, 2, 0)) - return score_map, im_info - - -def recover(img, im_info): - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] - img = cv2.resize(img, (w, h), cv2.INTER_LINEAR) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] - img = img[0:h, 0:w] - return img - - -def bg_replace(score_map, img, bg): - h, w, _ = img.shape - bg = cv2.resize(bg, (w, h)) - score_map = np.repeat(score_map[:, :, np.newaxis], 3, axis=2) - comb = (score_map * img + (1 - score_map) * bg).astype(np.uint8) - return comb - - -def infer(args): - resize_h = args.image_shape[1] - resize_w = args.image_shape[0] - - test_transforms = transforms.Compose( - [transforms.Resize((resize_w, resize_h)), - transforms.Normalize()]) - model = models.load_model(args.model_dir) - - if not osp.exists(args.save_dir): - os.makedirs(args.save_dir) - - # 图像背景替换 - if args.image_path is not None: - if not osp.exists(args.image_path): - raise Exception('The --image_path is not existed: {}'.format( - args.image_path)) - if args.background_image_path is None: - raise Exception( - 'The --background_image_path is not set. Please set it') - else: - if not osp.exists(args.background_image_path): - raise Exception( - 'The --background_image_path is not existed: {}'.format( - args.background_image_path)) - img = cv2.imread(args.image_path) - score_map, im_info = predict(img, model, test_transforms) - score_map = score_map[:, :, 1] - score_map = recover(score_map, im_info) - bg = cv2.imread(args.background_image_path) - save_name = osp.basename(args.image_path) - save_path = osp.join(args.save_dir, save_name) - result = bg_replace(score_map, img, bg) - cv2.imwrite(save_path, result) - - # 视频背景替换,如果提供背景视频则以背景视频作为背景,否则采用提供的背景图片 - else: - is_video_bg = False - if args.background_video_path is not None: - if not osp.exists(args.background_video_path): - raise Exception( - 'The --background_video_path is not existed: {}'.format( - args.background_video_path)) - is_video_bg = True - elif args.background_image_path is not None: - if not osp.exists(args.background_image_path): - raise Exception( - 'The --background_image_path is not existed: {}'.format( - args.background_image_path)) - else: - raise Exception( - 'Please offer backgound image or video. You should set --backbground_iamge_paht or --background_video_path' - ) - - disflow = cv2.DISOpticalFlow_create( - cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) - prev_gray = np.zeros((resize_h, resize_w), np.uint8) - prev_cfd = np.zeros((resize_h, resize_w), np.float32) - is_init = True - if args.video_path is not None: - print('Please wait. It is computing......') - if not osp.exists(args.video_path): - raise Exception('The --video_path is not existed: {}'.format( - args.video_path)) - - cap_video = cv2.VideoCapture(args.video_path) - fps = cap_video.get(cv2.CAP_PROP_FPS) - width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - save_name = osp.basename(args.video_path) - save_name = save_name.split('.')[0] - save_path = osp.join(args.save_dir, save_name + '.avi') - - cap_out = cv2.VideoWriter( - save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, - (width, height)) - - if is_video_bg: - cap_bg = cv2.VideoCapture(args.background_video_path) - frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) - current_frame_bg = 1 - else: - img_bg = cv2.imread(args.background_image_path) - while cap_video.isOpened(): - ret, frame = cap_video.read() - if ret: - score_map, im_info = predict(frame, model, test_transforms) - cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) - score_map = 255 * score_map[:, :, 1] - optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ - disflow, is_init) - prev_gray = cur_gray.copy() - prev_cfd = optflow_map.copy() - is_init = False - optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) - optflow_map = threshold_mask( - optflow_map, thresh_bg=0.2, thresh_fg=0.8) - score_map = recover(optflow_map, im_info) - - #循环读取背景帧 - if is_video_bg: - ret_bg, frame_bg = cap_bg.read() - if ret_bg: - if current_frame_bg == frames_bg: - current_frame_bg = 1 - cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) - else: - break - current_frame_bg += 1 - comb = bg_replace(score_map, frame, frame_bg) - else: - comb = bg_replace(score_map, frame, img_bg) - - cap_out.write(comb) - else: - break - - if is_video_bg: - cap_bg.release() - cap_video.release() - cap_out.release() - - # 当没有输入预测图像和视频的时候,则打开摄像头 - else: - cap_video = cv2.VideoCapture(0) - if not cap_video.isOpened(): - raise IOError("Error opening video stream or file, " - "--video_path whether existing: {}" - " or camera whether working".format( - args.video_path)) - return - - if is_video_bg: - cap_bg = cv2.VideoCapture(args.background_video_path) - frames_bg = cap_bg.get(cv2.CAP_PROP_FRAME_COUNT) - current_frame_bg = 1 - else: - img_bg = cv2.imread(args.background_image_path) - while cap_video.isOpened(): - ret, frame = cap_video.read() - if ret: - score_map, im_info = predict(frame, model, test_transforms) - cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) - score_map = 255 * score_map[:, :, 1] - optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ - disflow, is_init) - prev_gray = cur_gray.copy() - prev_cfd = optflow_map.copy() - is_init = False - optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) - optflow_map = threshold_mask( - optflow_map, thresh_bg=0.2, thresh_fg=0.8) - score_map = recover(optflow_map, im_info) - - #循环读取背景帧 - if is_video_bg: - ret_bg, frame_bg = cap_bg.read() - if ret_bg: - if current_frame_bg == frames_bg: - current_frame_bg = 1 - cap_bg.set(cv2.CAP_PROP_POS_FRAMES, 0) - else: - break - current_frame_bg += 1 - comb = bg_replace(score_map, frame, frame_bg) - else: - comb = bg_replace(score_map, frame, img_bg) - cv2.imshow('HumanSegmentation', comb) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - else: - break - if is_video_bg: - cap_bg.release() - cap_video.release() - - -if __name__ == "__main__": - args = parse_args() - infer(args) diff --git a/legacy/contrib/HumanSeg/data/background.jpg b/legacy/contrib/HumanSeg/data/background.jpg deleted file mode 100644 index 792e43c235..0000000000 Binary files a/legacy/contrib/HumanSeg/data/background.jpg and /dev/null differ diff --git a/legacy/contrib/HumanSeg/data/download_data.py b/legacy/contrib/HumanSeg/data/download_data.py deleted file mode 100644 index a788df0f7f..0000000000 --- a/legacy/contrib/HumanSeg/data/download_data.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "../../../", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_data(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - url = "https://paddleseg.bj.bcebos.com/humanseg/data/video_test.zip" - download_file_and_uncompress( - url=url, - savepath=savepath, - extrapath=extrapath, - extraname='video_test.mp4') - - -if __name__ == "__main__": - download_data(LOCAL_PATH, LOCAL_PATH) - print("Data download finish!") diff --git a/legacy/contrib/HumanSeg/data/human_image.jpg b/legacy/contrib/HumanSeg/data/human_image.jpg deleted file mode 100644 index d1cfb43e21..0000000000 Binary files a/legacy/contrib/HumanSeg/data/human_image.jpg and /dev/null differ diff --git a/legacy/contrib/HumanSeg/datasets/dataset.py b/legacy/contrib/HumanSeg/datasets/dataset.py deleted file mode 100644 index 07bfdebd4a..0000000000 --- a/legacy/contrib/HumanSeg/datasets/dataset.py +++ /dev/null @@ -1,275 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os.path as osp -from threading import Thread -import multiprocessing -import collections -import numpy as np -import six -import sys -import copy -import random -import platform -import chardet -import utils.logging as logging - - -class EndSignal(): - pass - - -def is_pic(img_name): - valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png'] - suffix = img_name.split('.')[-1] - if suffix not in valid_suffix: - return False - return True - - -def is_valid(sample): - if sample is None: - return False - if isinstance(sample, tuple): - for s in sample: - if s is None: - return False - elif isinstance(s, np.ndarray) and s.size == 0: - return False - elif isinstance(s, collections.Sequence) and len(s) == 0: - return False - return True - - -def get_encoding(path): - f = open(path, 'rb') - data = f.read() - file_encoding = chardet.detect(data).get('encoding') - return file_encoding - - -def multithread_reader(mapper, - reader, - num_workers=4, - buffer_size=1024, - batch_size=8, - drop_last=True): - from queue import Queue - end = EndSignal() - - # define a worker to read samples from reader to in_queue - def read_worker(reader, in_queue): - for i in reader(): - in_queue.put(i) - in_queue.put(end) - - # define a worker to handle samples from in_queue by mapper - # and put mapped samples into out_queue - def handle_worker(in_queue, out_queue, mapper): - sample = in_queue.get() - while not isinstance(sample, EndSignal): - if len(sample) == 2: - r = mapper(sample[0], sample[1]) - elif len(sample) == 3: - r = mapper(sample[0], sample[1], sample[2]) - else: - raise Exception('The sample\'s length must be 2 or 3.') - if is_valid(r): - out_queue.put(r) - sample = in_queue.get() - in_queue.put(end) - out_queue.put(end) - - def xreader(): - in_queue = Queue(buffer_size) - out_queue = Queue(buffer_size) - # start a read worker in a thread - target = read_worker - t = Thread(target=target, args=(reader, in_queue)) - t.daemon = True - t.start() - # start several handle_workers - target = handle_worker - args = (in_queue, out_queue, mapper) - workers = [] - for i in range(num_workers): - worker = Thread(target=target, args=args) - worker.daemon = True - workers.append(worker) - for w in workers: - w.start() - - batch_data = [] - sample = out_queue.get() - while not isinstance(sample, EndSignal): - batch_data.append(sample) - if len(batch_data) == batch_size: - yield batch_data - batch_data = [] - sample = out_queue.get() - finish = 1 - while finish < num_workers: - sample = out_queue.get() - if isinstance(sample, EndSignal): - finish += 1 - else: - batch_data.append(sample) - if len(batch_data) == batch_size: - yield batch_data - batch_data = [] - if not drop_last and len(batch_data) != 0: - yield batch_data - batch_data = [] - - return xreader - - -def multiprocess_reader(mapper, - reader, - num_workers=4, - buffer_size=1024, - batch_size=8, - drop_last=True): - from .shared_queue import SharedQueue as Queue - - def _read_into_queue(samples, mapper, queue): - end = EndSignal() - try: - for sample in samples: - if sample is None: - raise ValueError("sample has None") - if len(sample) == 2: - result = mapper(sample[0], sample[1]) - elif len(sample) == 3: - result = mapper(sample[0], sample[1], sample[2]) - else: - raise Exception('The sample\'s length must be 2 or 3.') - if is_valid(result): - queue.put(result) - queue.put(end) - except: - queue.put("") - six.reraise(*sys.exc_info()) - - def queue_reader(): - queue = Queue(buffer_size, memsize=3 * 1024**3) - total_samples = [[] for i in range(num_workers)] - for i, sample in enumerate(reader()): - index = i % num_workers - total_samples[index].append(sample) - for i in range(num_workers): - p = multiprocessing.Process( - target=_read_into_queue, args=(total_samples[i], mapper, queue)) - p.start() - - finish_num = 0 - batch_data = list() - while finish_num < num_workers: - sample = queue.get() - if isinstance(sample, EndSignal): - finish_num += 1 - elif sample == "": - raise ValueError("multiprocess reader raises an exception") - else: - batch_data.append(sample) - if len(batch_data) == batch_size: - yield batch_data - batch_data = [] - if len(batch_data) != 0 and not drop_last: - yield batch_data - batch_data = [] - - return queue_reader - - -class Dataset: - def __init__(self, - data_dir, - file_list, - label_list=None, - transforms=None, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=False): - if num_workers == 'auto': - import multiprocessing as mp - num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8 - if transforms is None: - raise Exception("transform should be defined.") - self.transforms = transforms - self.num_workers = num_workers - self.buffer_size = buffer_size - self.parallel_method = parallel_method - self.shuffle = shuffle - - self.file_list = list() - self.labels = list() - self._epoch = 0 - - if label_list is not None: - with open(label_list, encoding=get_encoding(label_list)) as f: - for line in f: - item = line.strip() - self.labels.append(item) - - with open(file_list, encoding=get_encoding(file_list)) as f: - for line in f: - items = line.strip().split() - if not is_pic(items[0]): - continue - full_path_im = osp.join(data_dir, items[0]) - full_path_label = osp.join(data_dir, items[1]) - if not osp.exists(full_path_im): - raise IOError( - 'The image file {} is not exist!'.format(full_path_im)) - if not osp.exists(full_path_label): - raise IOError('The image file {} is not exist!'.format( - full_path_label)) - self.file_list.append([full_path_im, full_path_label]) - self.num_samples = len(self.file_list) - logging.info("{} samples in file {}".format( - len(self.file_list), file_list)) - - def iterator(self): - self._epoch += 1 - self._pos = 0 - files = copy.deepcopy(self.file_list) - if self.shuffle: - random.shuffle(files) - files = files[:self.num_samples] - self.num_samples = len(files) - for f in files: - label_path = f[1] - sample = [f[0], None, label_path] - yield sample - - def generator(self, batch_size=1, drop_last=True): - self.batch_size = batch_size - parallel_reader = multithread_reader - if self.parallel_method == "process": - if platform.platform().startswith("Windows"): - logging.debug( - "multiprocess_reader is not supported in Windows platform, force to use multithread_reader." - ) - else: - parallel_reader = multiprocess_reader - return parallel_reader( - self.transforms, - self.iterator, - num_workers=self.num_workers, - buffer_size=self.buffer_size, - batch_size=batch_size, - drop_last=drop_last) diff --git a/legacy/contrib/HumanSeg/datasets/shared_queue/__init__.py b/legacy/contrib/HumanSeg/datasets/shared_queue/__init__.py deleted file mode 100644 index 1662f739d4..0000000000 --- a/legacy/contrib/HumanSeg/datasets/shared_queue/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -__all__ = ['SharedBuffer', 'SharedMemoryMgr', 'SharedQueue'] - -from .sharedmemory import SharedBuffer -from .sharedmemory import SharedMemoryMgr -from .sharedmemory import SharedMemoryError -from .queue import SharedQueue diff --git a/legacy/contrib/HumanSeg/datasets/shared_queue/queue.py b/legacy/contrib/HumanSeg/datasets/shared_queue/queue.py deleted file mode 100644 index 7a67f98de7..0000000000 --- a/legacy/contrib/HumanSeg/datasets/shared_queue/queue.py +++ /dev/null @@ -1,103 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import sys -import six -if six.PY3: - import pickle - from io import BytesIO as StringIO -else: - import cPickle as pickle - from cStringIO import StringIO - -import logging -import traceback -import multiprocessing as mp -from multiprocessing.queues import Queue -from .sharedmemory import SharedMemoryMgr - -logger = logging.getLogger(__name__) - - -class SharedQueueError(ValueError): - """ SharedQueueError - """ - pass - - -class SharedQueue(Queue): - """ a Queue based on shared memory to communicate data between Process, - and it's interface is compatible with 'multiprocessing.queues.Queue' - """ - - def __init__(self, maxsize=0, mem_mgr=None, memsize=None, pagesize=None): - """ init - """ - if six.PY3: - super(SharedQueue, self).__init__(maxsize, ctx=mp.get_context()) - else: - super(SharedQueue, self).__init__(maxsize) - - if mem_mgr is not None: - self._shared_mem = mem_mgr - else: - self._shared_mem = SharedMemoryMgr( - capacity=memsize, pagesize=pagesize) - - def put(self, obj, **kwargs): - """ put an object to this queue - """ - obj = pickle.dumps(obj, -1) - buff = None - try: - buff = self._shared_mem.malloc(len(obj)) - buff.put(obj) - super(SharedQueue, self).put(buff, **kwargs) - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to put a element to SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - - if buff is not None: - buff.free() - raise e - - def get(self, **kwargs): - """ get an object from this queue - """ - buff = None - try: - buff = super(SharedQueue, self).get(**kwargs) - data = buff.get() - return pickle.load(StringIO(data)) - except Exception as e: - stack_info = traceback.format_exc() - err_msg = 'failed to get element from SharedQueue '\ - 'with stack info[%s]' % (stack_info) - logger.warn(err_msg) - raise e - finally: - if buff is not None: - buff.free() - - def release(self): - self._shared_mem.release() - self._shared_mem = None diff --git a/legacy/contrib/HumanSeg/datasets/shared_queue/sharedmemory.py b/legacy/contrib/HumanSeg/datasets/shared_queue/sharedmemory.py deleted file mode 100644 index 8df1375219..0000000000 --- a/legacy/contrib/HumanSeg/datasets/shared_queue/sharedmemory.py +++ /dev/null @@ -1,532 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import os -import time -import math -import struct -import sys -import six - -if six.PY3: - import pickle -else: - import cPickle as pickle - -import json -import uuid -import random -import numpy as np -import weakref -import logging -from multiprocessing import Lock -from multiprocessing import RawArray - -logger = logging.getLogger(__name__) - - -class SharedMemoryError(ValueError): - """ SharedMemoryError - """ - pass - - -class SharedBufferError(SharedMemoryError): - """ SharedBufferError - """ - pass - - -class MemoryFullError(SharedMemoryError): - """ MemoryFullError - """ - - def __init__(self, errmsg=''): - super(MemoryFullError, self).__init__() - self.errmsg = errmsg - - -def memcopy(dst, src, offset=0, length=None): - """ copy data from 'src' to 'dst' in bytes - """ - length = length if length is not None else len(src) - assert type(dst) == np.ndarray, 'invalid type for "dst" in memcopy' - if type(src) is not np.ndarray: - if type(src) is str and six.PY3: - src = src.encode() - src = np.frombuffer(src, dtype='uint8', count=len(src)) - - dst[:] = src[offset:offset + length] - - -class SharedBuffer(object): - """ Buffer allocated from SharedMemoryMgr, and it stores data on shared memory - - note that: - every instance of this should be freed explicitely by calling 'self.free' - """ - - def __init__(self, owner, capacity, pos, size=0, alloc_status=''): - """ Init - - Args: - owner (str): manager to own this buffer - capacity (int): capacity in bytes for this buffer - pos (int): page position in shared memory - size (int): bytes already used - alloc_status (str): debug info about allocator when allocate this - """ - self._owner = owner - self._cap = capacity - self._pos = pos - self._size = size - self._alloc_status = alloc_status - assert self._pos >= 0 and self._cap > 0, \ - "invalid params[%d:%d] to construct SharedBuffer" \ - % (self._pos, self._cap) - - def owner(self): - """ get owner - """ - return SharedMemoryMgr.get_mgr(self._owner) - - def put(self, data, override=False): - """ put data to this buffer - - Args: - data (str): data to be stored in this buffer - - Returns: - None - - Raises: - SharedMemoryError when not enough space in this buffer - """ - assert type(data) in [str, bytes], \ - 'invalid type[%s] for SharedBuffer::put' % (str(type(data))) - if self._size > 0 and not override: - raise SharedBufferError('already has already been setted before') - - if self.capacity() < len(data): - raise SharedBufferError('data[%d] is larger than size of buffer[%s]'\ - % (len(data), str(self))) - - self.owner().put_data(self, data) - self._size = len(data) - - def get(self, offset=0, size=None, no_copy=True): - """ get the data stored this buffer - - Args: - offset (int): position for the start point to 'get' - size (int): size to get - - Returns: - data (np.ndarray('uint8')): user's data in numpy - which is passed in by 'put' - None: if no data stored in - """ - offset = offset if offset >= 0 else self._size + offset - if self._size <= 0: - return None - - size = self._size if size is None else size - assert offset + size <= self._cap, 'invalid offset[%d] '\ - 'or size[%d] for capacity[%d]' % (offset, size, self._cap) - return self.owner().get_data(self, offset, size, no_copy=no_copy) - - def size(self): - """ bytes of used memory - """ - return self._size - - def resize(self, size): - """ resize the used memory to 'size', should not be greater than capacity - """ - assert size >= 0 and size <= self._cap, \ - "invalid size[%d] for resize" % (size) - - self._size = size - - def capacity(self): - """ size of allocated memory - """ - return self._cap - - def __str__(self): - """ human readable format - """ - return "SharedBuffer(owner:%s, pos:%d, size:%d, "\ - "capacity:%d, alloc_status:[%s], pid:%d)" \ - % (str(self._owner), self._pos, self._size, \ - self._cap, self._alloc_status, os.getpid()) - - def free(self): - """ free this buffer to it's owner - """ - if self._owner is not None: - self.owner().free(self) - self._owner = None - self._cap = 0 - self._pos = -1 - self._size = 0 - return True - else: - return False - - -class PageAllocator(object): - """ allocator used to malloc and free shared memory which - is split into pages - """ - s_allocator_header = 12 - - def __init__(self, base, total_pages, page_size): - """ init - """ - self._magic_num = 1234321000 + random.randint(100, 999) - self._base = base - self._total_pages = total_pages - self._page_size = page_size - - header_pages = int( - math.ceil((total_pages + self.s_allocator_header) / page_size)) - - self._header_pages = header_pages - self._free_pages = total_pages - header_pages - self._header_size = self._header_pages * page_size - self._reset() - - def _dump_alloc_info(self, fname): - hpages, tpages, pos, used = self.header() - - start = self.s_allocator_header - end = start + self._page_size * hpages - alloc_flags = self._base[start:end].tostring() - info = { - 'magic_num': self._magic_num, - 'header_pages': hpages, - 'total_pages': tpages, - 'pos': pos, - 'used': used - } - info['alloc_flags'] = alloc_flags - fname = fname + '.' + str(uuid.uuid4())[:6] - with open(fname, 'wb') as f: - f.write(pickle.dumps(info, -1)) - logger.warn('dump alloc info to file[%s]' % (fname)) - - def _reset(self): - alloc_page_pos = self._header_pages - used_pages = self._header_pages - header_info = struct.pack( - str('III'), self._magic_num, alloc_page_pos, used_pages) - assert len(header_info) == self.s_allocator_header, \ - 'invalid size of header_info' - - memcopy(self._base[0:self.s_allocator_header], header_info) - self.set_page_status(0, self._header_pages, '1') - self.set_page_status(self._header_pages, self._free_pages, '0') - - def header(self): - """ get header info of this allocator - """ - header_str = self._base[0:self.s_allocator_header].tostring() - magic, pos, used = struct.unpack(str('III'), header_str) - - assert magic == self._magic_num, \ - 'invalid header magic[%d] in shared memory' % (magic) - return self._header_pages, self._total_pages, pos, used - - def empty(self): - """ are all allocatable pages available - """ - header_pages, pages, pos, used = self.header() - return header_pages == used - - def full(self): - """ are all allocatable pages used - """ - header_pages, pages, pos, used = self.header() - return header_pages + used == pages - - def __str__(self): - header_pages, pages, pos, used = self.header() - desc = '{page_info[magic:%d,total:%d,used:%d,header:%d,alloc_pos:%d,pagesize:%d]}' \ - % (self._magic_num, pages, used, header_pages, pos, self._page_size) - return 'PageAllocator:%s' % (desc) - - def set_alloc_info(self, alloc_pos, used_pages): - """ set allocating position to new value - """ - memcopy(self._base[4:12], struct.pack(str('II'), alloc_pos, used_pages)) - - def set_page_status(self, start, page_num, status): - """ set pages from 'start' to 'end' with new same status 'status' - """ - assert status in ['0', '1'], 'invalid status[%s] for page status '\ - 'in allocator[%s]' % (status, str(self)) - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - memcopy(self._base[start:end], str(status * page_num)) - - def get_page_status(self, start, page_num, ret_flag=False): - start += self.s_allocator_header - end = start + page_num - assert start >= 0 and end <= self._header_size, 'invalid end[%d] of pages '\ - 'in allocator[%s]' % (end, str(self)) - status = self._base[start:end].tostring().decode() - if ret_flag: - return status - - zero_num = status.count('0') - if zero_num == 0: - return (page_num, 1) - else: - return (zero_num, 0) - - def malloc_page(self, page_num): - header_pages, pages, pos, used = self.header() - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - - start_pos = pos - flags = '' - while True: - # maybe flags already has some '0' pages, - # so just check 'page_num - len(flags)' pages - flags = self.get_page_status(pos, page_num, ret_flag=True) - - if flags.count('0') == page_num: - break - - # not found enough pages, so shift to next few pages - free_pos = flags.rfind('1') + 1 - pos += free_pos - end = pos + page_num - if end > pages: - pos = self._header_pages - end = pos + page_num - flags = '' - - # not found available pages after scan all pages - if pos <= start_pos and end >= start_pos: - logger.debug('not found available pages after scan all pages') - break - - page_status = (flags.count('0'), 0) - if page_status != (page_num, 0): - free_pages = self._total_pages - used - if free_pages == 0: - err_msg = 'all pages have been used:%s' % (str(self)) - else: - err_msg = 'not found available pages with page_status[%s] '\ - 'and %d free pages' % (str(page_status), free_pages) - err_msg = 'failed to malloc %d pages at pos[%d] for reason[%s] and allocator status[%s]' \ - % (page_num, pos, err_msg, str(self)) - raise MemoryFullError(err_msg) - - self.set_page_status(pos, page_num, '1') - used += page_num - self.set_alloc_info(end, used) - return pos - - def free_page(self, start, page_num): - """ free 'page_num' pages start from 'start' - """ - page_status = self.get_page_status(start, page_num) - assert page_status == (page_num, 1), \ - 'invalid status[%s] when free [%d, %d]' \ - % (str(page_status), start, page_num) - self.set_page_status(start, page_num, '0') - _, _, pos, used = self.header() - used -= page_num - self.set_alloc_info(pos, used) - - -DEFAULT_SHARED_MEMORY_SIZE = 1024 * 1024 * 1024 - - -class SharedMemoryMgr(object): - """ manage a continouse block of memory, provide - 'malloc' to allocate new buffer, and 'free' to free buffer - """ - s_memory_mgrs = weakref.WeakValueDictionary() - s_mgr_num = 0 - s_log_statis = False - - @classmethod - def get_mgr(cls, id): - """ get a SharedMemoryMgr with size of 'capacity' - """ - assert id in cls.s_memory_mgrs, 'invalid id[%s] for memory managers' % ( - id) - return cls.s_memory_mgrs[id] - - def __init__(self, capacity=None, pagesize=None): - """ init - """ - logger.debug('create SharedMemoryMgr') - - pagesize = 64 * 1024 if pagesize is None else pagesize - assert type(pagesize) is int, "invalid type of pagesize[%s]" \ - % (str(pagesize)) - - capacity = DEFAULT_SHARED_MEMORY_SIZE if capacity is None else capacity - assert type(capacity) is int, "invalid type of capacity[%s]" \ - % (str(capacity)) - - assert capacity > 0, '"size of shared memory should be greater than 0' - self._released = False - self._cap = capacity - self._page_size = pagesize - - assert self._cap % self._page_size == 0, \ - "capacity[%d] and pagesize[%d] are not consistent" \ - % (self._cap, self._page_size) - self._total_pages = self._cap // self._page_size - - self._pid = os.getpid() - SharedMemoryMgr.s_mgr_num += 1 - self._id = self._pid * 100 + SharedMemoryMgr.s_mgr_num - SharedMemoryMgr.s_memory_mgrs[self._id] = self - self._locker = Lock() - self._setup() - - def _setup(self): - self._shared_mem = RawArray('c', self._cap) - self._base = np.frombuffer( - self._shared_mem, dtype='uint8', count=self._cap) - self._locker.acquire() - try: - self._allocator = PageAllocator(self._base, self._total_pages, - self._page_size) - finally: - self._locker.release() - - def malloc(self, size, wait=True): - """ malloc a new SharedBuffer - - Args: - size (int): buffer size to be malloc - wait (bool): whether to wait when no enough memory - - Returns: - SharedBuffer - - Raises: - SharedMemoryError when not found available memory - """ - page_num = int(math.ceil(size / self._page_size)) - size = page_num * self._page_size - - start = None - ct = 0 - errmsg = '' - while True: - self._locker.acquire() - try: - start = self._allocator.malloc_page(page_num) - alloc_status = str(self._allocator) - except MemoryFullError as e: - start = None - errmsg = e.errmsg - if not wait: - raise e - finally: - self._locker.release() - - if start is None: - time.sleep(0.1) - if ct % 100 == 0: - logger.warn('not enough space for reason[%s]' % (errmsg)) - - ct += 1 - else: - break - - return SharedBuffer(self._id, size, start, alloc_status=alloc_status) - - def free(self, shared_buf): - """ free a SharedBuffer - - Args: - shared_buf (SharedBuffer): buffer to be freed - - Returns: - None - - Raises: - SharedMemoryError when failed to release this buffer - """ - assert shared_buf._owner == self._id, "invalid shared_buf[%s] "\ - "for it's not allocated from me[%s]" % (str(shared_buf), str(self)) - cap = shared_buf.capacity() - start_page = shared_buf._pos - page_num = cap // self._page_size - - #maybe we don't need this lock here - self._locker.acquire() - try: - self._allocator.free_page(start_page, page_num) - finally: - self._locker.release() - - def put_data(self, shared_buf, data): - """ fill 'data' into 'shared_buf' - """ - assert len(data) <= shared_buf.capacity(), 'too large data[%d] '\ - 'for this buffer[%s]' % (len(data), str(shared_buf)) - start = shared_buf._pos * self._page_size - end = start + len(data) - assert start >= 0 and end <= self._cap, "invalid start "\ - "position[%d] when put data to buff:%s" % (start, str(shared_buf)) - self._base[start:end] = np.frombuffer(data, 'uint8', len(data)) - - def get_data(self, shared_buf, offset, size, no_copy=True): - """ extract 'data' from 'shared_buf' in range [offset, offset + size) - """ - start = shared_buf._pos * self._page_size - start += offset - if no_copy: - return self._base[start:start + size] - else: - return self._base[start:start + size].tostring() - - def __str__(self): - return 'SharedMemoryMgr:{id:%d, %s}' % (self._id, str(self._allocator)) - - def __del__(self): - if SharedMemoryMgr.s_log_statis: - logger.info('destroy [%s]' % (self)) - - if not self._released and not self._allocator.empty(): - logger.debug( - 'not empty when delete this SharedMemoryMgr[%s]' % (self)) - else: - self._released = True - - if self._id in SharedMemoryMgr.s_memory_mgrs: - del SharedMemoryMgr.s_memory_mgrs[self._id] - SharedMemoryMgr.s_mgr_num -= 1 diff --git a/legacy/contrib/HumanSeg/export.py b/legacy/contrib/HumanSeg/export.py deleted file mode 100644 index c6444bb298..0000000000 --- a/legacy/contrib/HumanSeg/export.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import models -import argparse - - -def parse_args(): - parser = argparse.ArgumentParser(description='Export model') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for exporting', - type=str) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the export model', - type=str, - default='./output/export') - return parser.parse_args() - - -def export(args): - model = models.load_model(args.model_dir) - model.export_inference_model(args.save_dir) - - -if __name__ == '__main__': - args = parse_args() - export(args) diff --git a/legacy/contrib/HumanSeg/infer.py b/legacy/contrib/HumanSeg/infer.py deleted file mode 100644 index dbb3038236..0000000000 --- a/legacy/contrib/HumanSeg/infer.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import os.path as osp -import cv2 -import numpy as np -import tqdm - -import utils -import models -import transforms - - -def parse_args(): - parser = argparse.ArgumentParser( - description='HumanSeg inference and visualization') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for inference', - type=str) - parser.add_argument( - '--data_dir', - dest='data_dir', - help='The root directory of dataset', - type=str) - parser.add_argument( - '--test_list', - dest='test_list', - help='Test list file of dataset', - type=str) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the inference results', - type=str, - default='./output/result') - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - return parser.parse_args() - - -def mkdir(path): - sub_dir = osp.dirname(path) - if not osp.exists(sub_dir): - os.makedirs(sub_dir) - - -def infer(args): - test_transforms = transforms.Compose( - [transforms.Resize(args.image_shape), - transforms.Normalize()]) - model = models.load_model(args.model_dir) - added_saveed_path = osp.join(args.save_dir, 'added') - mat_saved_path = osp.join(args.save_dir, 'mat') - scoremap_saved_path = osp.join(args.save_dir, 'scoremap') - - with open(args.test_list, 'r') as f: - files = f.readlines() - - for file in tqdm.tqdm(files): - file = file.strip() - im_file = osp.join(args.data_dir, file) - im = cv2.imread(im_file) - result = model.predict(im, transforms=test_transforms) - - # save added image - added_image = utils.visualize(im_file, result, weight=0.6) - added_image_file = osp.join(added_saveed_path, file) - mkdir(added_image_file) - cv2.imwrite(added_image_file, added_image) - - # save score map - score_map = result['score_map'][:, :, 1] - score_map = (score_map * 255).astype(np.uint8) - score_map_file = osp.join(scoremap_saved_path, file) - mkdir(score_map_file) - cv2.imwrite(score_map_file, score_map) - - # save mat image - score_map = np.expand_dims(score_map, axis=-1) - mat_image = np.concatenate([im, score_map], axis=2) - mat_file = osp.join(mat_saved_path, file) - ext = osp.splitext(mat_file)[-1] - mat_file = mat_file.replace(ext, '.png') - mkdir(mat_file) - cv2.imwrite(mat_file, mat_image) - - -if __name__ == '__main__': - args = parse_args() - infer(args) diff --git a/legacy/contrib/HumanSeg/models/__init__.py b/legacy/contrib/HumanSeg/models/__init__.py deleted file mode 100644 index f2fac79c41..0000000000 --- a/legacy/contrib/HumanSeg/models/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .humanseg import HumanSegMobile -from .humanseg import HumanSegServer -from .humanseg import HumanSegLite -from .load_model import load_model diff --git a/legacy/contrib/HumanSeg/models/humanseg.py b/legacy/contrib/HumanSeg/models/humanseg.py deleted file mode 100644 index 1790e31e36..0000000000 --- a/legacy/contrib/HumanSeg/models/humanseg.py +++ /dev/null @@ -1,919 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import paddle.fluid as fluid -import os -from os import path as osp -import numpy as np -from collections import OrderedDict -import copy -import math -import time -import tqdm -import cv2 -import yaml -import shutil -import paddleslim as slim -import paddle - -import utils -import utils.logging as logging -from utils import seconds_to_hms -from utils import ConfusionMatrix -from utils import get_environ_info -from nets import DeepLabv3p, ShuffleSeg, HRNet -import transforms as T - - -def save_infer_program(test_program, ckpt_dir): - _test_program = test_program.clone() - _test_program.desc.flush() - _test_program.desc._set_version() - paddle.fluid.core.save_op_compatible_info(_test_program.desc) - with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f: - f.write(_test_program.desc.serialize_to_string()) - - -def dict2str(dict_input): - out = '' - for k, v in dict_input.items(): - try: - v = round(float(v), 6) - except: - pass - out = out + '{}={}, '.format(k, v) - return out.strip(', ') - - -class SegModel(object): - # DeepLab mobilenet - def __init__(self, - num_classes=2, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - self.init_params = locals() - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.sync_bn = sync_bn - - self.labels = None - self.env_info = get_environ_info() - if self.env_info['place'] == 'cpu': - self.places = fluid.cpu_places() - else: - self.places = fluid.cuda_places() - self.exe = fluid.Executor(self.places[0]) - self.train_prog = None - self.test_prog = None - self.parallel_train_prog = None - self.train_inputs = None - self.test_inputs = None - self.train_outputs = None - self.test_outputs = None - self.train_data_loader = None - self.eval_metrics = None - # 当前模型状态 - self.status = 'Normal' - - def _get_single_card_bs(self, batch_size): - if batch_size % len(self.places) == 0: - return int(batch_size // len(self.places)) - else: - raise Exception("Please support correct batch_size, \ - which can be divided by available cards({}) in {}". - format(self.env_info['num'], - self.env_info['place'])) - - def build_net(self, mode='train'): - """应根据不同的情况进行构建""" - pass - - def build_program(self): - # build training network - self.train_inputs, self.train_outputs = self.build_net(mode='train') - self.train_prog = fluid.default_main_program() - startup_prog = fluid.default_startup_program() - - # build prediction network - self.test_prog = fluid.Program() - with fluid.program_guard(self.test_prog, startup_prog): - with fluid.unique_name.guard(): - self.test_inputs, self.test_outputs = self.build_net( - mode='test') - self.test_prog = self.test_prog.clone(for_test=True) - - def arrange_transform(self, transforms, mode='train'): - arrange_transform = T.ArrangeSegmenter - if type(transforms.transforms[-1]).__name__.startswith('Arrange'): - transforms.transforms[-1] = arrange_transform(mode=mode) - else: - transforms.transforms.append(arrange_transform(mode=mode)) - - def build_train_data_loader(self, dataset, batch_size): - # init data_loader - if self.train_data_loader is None: - self.train_data_loader = fluid.io.DataLoader.from_generator( - feed_list=list(self.train_inputs.values()), - capacity=64, - use_double_buffer=True, - iterable=True) - batch_size_each_gpu = self._get_single_card_bs(batch_size) - self.train_data_loader.set_sample_list_generator( - dataset.generator(batch_size=batch_size_each_gpu), - places=self.places) - - def net_initialize(self, - startup_prog=None, - pretrained_weights=None, - resume_weights=None): - if startup_prog is None: - startup_prog = fluid.default_startup_program() - self.exe.run(startup_prog) - if resume_weights is not None: - logging.info("Resume weights from {}".format(resume_weights)) - if not osp.exists(resume_weights): - raise Exception("Path {} not exists.".format(resume_weights)) - fluid.load(self.train_prog, osp.join(resume_weights, 'model'), - self.exe) - # Check is path ended by path spearator - if resume_weights[-1] == os.sep: - resume_weights = resume_weights[0:-1] - epoch_name = osp.basename(resume_weights) - # If resume weights is end of digit, restore epoch status - epoch = epoch_name.split('_')[-1] - if epoch.isdigit(): - self.begin_epoch = int(epoch) - else: - raise ValueError("Resume model path is not valid!") - logging.info("Model checkpoint loaded successfully!") - - elif pretrained_weights is not None: - logging.info( - "Load pretrain weights from {}.".format(pretrained_weights)) - utils.load_pretrained_weights(self.exe, self.train_prog, - pretrained_weights) - - def get_model_info(self): - # 存储相应的信息到yml文件 - info = dict() - info['Model'] = self.__class__.__name__ - if 'self' in self.init_params: - del self.init_params['self'] - if '__class__' in self.init_params: - del self.init_params['__class__'] - info['_init_params'] = self.init_params - - info['_Attributes'] = dict() - info['_Attributes']['num_classes'] = self.num_classes - info['_Attributes']['labels'] = self.labels - try: - info['_Attributes']['eval_metric'] = dict() - for k, v in self.eval_metrics.items(): - if isinstance(v, np.ndarray): - if v.size > 1: - v = [float(i) for i in v] - else: - v = float(v) - info['_Attributes']['eval_metric'][k] = v - except: - pass - - if hasattr(self, 'test_transforms'): - if self.test_transforms is not None: - info['test_transforms'] = list() - for op in self.test_transforms.transforms: - name = op.__class__.__name__ - attr = op.__dict__ - info['test_transforms'].append({name: attr}) - - if hasattr(self, 'train_transforms'): - if self.train_transforms is not None: - info['train_transforms'] = list() - for op in self.train_transforms.transforms: - name = op.__class__.__name__ - attr = op.__dict__ - info['train_transforms'].append({name: attr}) - - if hasattr(self, 'train_init'): - if 'self' in self.train_init: - del self.train_init['self'] - if 'train_dataset' in self.train_init: - del self.train_init['train_dataset'] - if 'eval_dataset' in self.train_init: - del self.train_init['eval_dataset'] - if 'optimizer' in self.train_init: - del self.train_init['optimizer'] - info['train_init'] = self.train_init - return info - - def save_model(self, save_dir): - if not osp.isdir(save_dir): - if osp.exists(save_dir): - os.remove(save_dir) - os.makedirs(save_dir) - model_info = self.get_model_info() - - if self.status == 'Normal': - fluid.save(self.train_prog, osp.join(save_dir, 'model')) - save_infer_program(self.test_prog, save_dir) - model_info['status'] = 'Normal' - elif self.status == 'Quant': - fluid.save(self.test_prog, osp.join(save_dir, 'model')) - model_info['status'] = 'QuantOnline' - - with open( - osp.join(save_dir, 'model.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(model_info, f) - - # The flag of model for saving successfully - open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model saved in {}.".format(save_dir)) - - def export_inference_model(self, save_dir): - test_input_names = [var.name for var in list(self.test_inputs.values())] - test_outputs = list(self.test_outputs.values()) - fluid.io.save_inference_model( - dirname=save_dir, - executor=self.exe, - params_filename='__params__', - feeded_var_names=test_input_names, - target_vars=test_outputs, - main_program=self.test_prog) - model_info = self.get_model_info() - model_info['status'] = 'Infer' - - # Save input and output descrition of model - model_info['_ModelInputsOutputs'] = dict() - model_info['_ModelInputsOutputs']['test_inputs'] = [ - [k, v.name] for k, v in self.test_inputs.items() - ] - model_info['_ModelInputsOutputs']['test_outputs'] = [ - [k, v.name] for k, v in self.test_outputs.items() - ] - - with open( - osp.join(save_dir, 'model.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(model_info, f) - - # The flag of model for saving successfully - open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model for inference deploy saved in {}.".format(save_dir)) - - def export_quant_model(self, - dataset=None, - save_dir=None, - batch_size=1, - batch_nums=10, - cache_dir=".temp", - quant_type="offline"): - if quant_type == "offline": - self.arrange_transform(transforms=dataset.transforms, mode='quant') - dataset.num_samples = batch_size * batch_nums - try: - from utils import HumanSegPostTrainingQuantization - except: - raise Exception( - "Model Quantization is not available, try to upgrade your paddlepaddle>=1.8.1" - ) - is_use_cache_file = True - if cache_dir is None: - is_use_cache_file = False - post_training_quantization = HumanSegPostTrainingQuantization( - executor=self.exe, - dataset=dataset, - program=self.test_prog, - inputs=self.test_inputs, - outputs=self.test_outputs, - batch_size=batch_size, - batch_nums=batch_nums, - scope=None, - algo='KL', - quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], - is_full_quantize=False, - is_use_cache_file=is_use_cache_file, - cache_dir=cache_dir) - post_training_quantization.quantize() - post_training_quantization.save_quantized_model(save_dir) - if cache_dir is not None: - shutil.rmtree(cache_dir) - else: - float_prog, _ = slim.quant.convert( - self.test_prog, self.exe.place, save_int8=True) - test_input_names = [ - var.name for var in list(self.test_inputs.values()) - ] - test_outputs = list(self.test_outputs.values()) - fluid.io.save_inference_model( - dirname=save_dir, - executor=self.exe, - params_filename='__params__', - feeded_var_names=test_input_names, - target_vars=test_outputs, - main_program=float_prog) - - model_info = self.get_model_info() - model_info['status'] = 'Quant' - - # Save input and output descrition of model - model_info['_ModelInputsOutputs'] = dict() - model_info['_ModelInputsOutputs']['test_inputs'] = [ - [k, v.name] for k, v in self.test_inputs.items() - ] - model_info['_ModelInputsOutputs']['test_outputs'] = [ - [k, v.name] for k, v in self.test_outputs.items() - ] - - with open( - osp.join(save_dir, 'model.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(model_info, f) - - # The flag of model for saving successfully - open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model for quant saved in {}.".format(save_dir)) - - def default_optimizer(self, - learning_rate, - num_epochs, - num_steps_each_epoch, - lr_decay_power=0.9, - regularization_coeff=4e-5): - decay_step = num_epochs * num_steps_each_epoch - lr_decay = fluid.layers.polynomial_decay( - learning_rate, - decay_step, - end_learning_rate=0, - power=lr_decay_power) - optimizer = fluid.optimizer.Momentum( - lr_decay, - momentum=0.9, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=regularization_coeff)) - return optimizer - - def train(self, - num_epochs, - train_dataset, - train_batch_size=2, - eval_dataset=None, - save_interval_epochs=1, - log_interval_steps=2, - save_dir='output', - pretrained_weights=None, - resume_weights=None, - optimizer=None, - learning_rate=0.01, - lr_decay_power=0.9, - regularization_coeff=4e-5, - use_vdl=False, - quant=False): - self.labels = train_dataset.labels - self.train_transforms = train_dataset.transforms - self.train_init = locals() - self.begin_epoch = 0 - - if optimizer is None: - num_steps_each_epoch = train_dataset.num_samples // train_batch_size - optimizer = self.default_optimizer( - learning_rate=learning_rate, - num_epochs=num_epochs, - num_steps_each_epoch=num_steps_each_epoch, - lr_decay_power=lr_decay_power, - regularization_coeff=regularization_coeff) - self.optimizer = optimizer - self.build_program() - self.net_initialize( - startup_prog=fluid.default_startup_program(), - pretrained_weights=pretrained_weights, - resume_weights=resume_weights) - - # 进行量化 - if quant: - # 当 for_test=False ,返回类型为 fluid.CompiledProgram - # 当 for_test=True ,返回类型为 fluid.Program - self.train_prog = slim.quant.quant_aware( - self.train_prog, self.exe.place, for_test=False) - self.test_prog = slim.quant.quant_aware( - self.test_prog, self.exe.place, for_test=True) - # self.parallel_train_prog = self.train_prog.with_data_parallel( - # loss_name=self.train_outputs['loss'].name) - self.status = 'Quant' - - if self.begin_epoch >= num_epochs: - raise ValueError( - ("begin epoch[{}] is larger than num_epochs[{}]").format( - self.begin_epoch, num_epochs)) - - if not osp.isdir(save_dir): - if osp.exists(save_dir): - os.remove(save_dir) - os.makedirs(save_dir) - - # add arrange op tor transforms - self.arrange_transform( - transforms=train_dataset.transforms, mode='train') - self.build_train_data_loader( - dataset=train_dataset, batch_size=train_batch_size) - - if eval_dataset is not None: - self.eval_transforms = eval_dataset.transforms - self.test_transforms = copy.deepcopy(eval_dataset.transforms) - - lr = self.optimizer._learning_rate - lr.persistable = True - if isinstance(lr, fluid.framework.Variable): - self.train_outputs['lr'] = lr - - # 多卡训练 - if self.parallel_train_prog is None: - build_strategy = fluid.compiler.BuildStrategy() - if self.env_info['place'] != 'cpu' and len(self.places) > 1: - build_strategy.sync_batch_norm = self.sync_bn - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_iteration_per_drop_scope = 1 - if quant: - build_strategy.fuse_all_reduce_ops = False - build_strategy.sync_batch_norm = False - self.parallel_train_prog = self.train_prog.with_data_parallel( - loss_name=self.train_outputs['loss'].name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - else: - self.parallel_train_prog = fluid.CompiledProgram( - self.train_prog).with_data_parallel( - loss_name=self.train_outputs['loss'].name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - - total_num_steps = math.floor( - train_dataset.num_samples / train_batch_size) - num_steps = 0 - time_stat = list() - time_train_one_epoch = None - time_eval_one_epoch = None - - total_num_steps_eval = 0 - # eval times - total_eval_times = math.ceil(num_epochs / save_interval_epochs) - eval_batch_size = train_batch_size - if eval_dataset is not None: - total_num_steps_eval = math.ceil( - eval_dataset.num_samples / eval_batch_size) - - if use_vdl: - from visualdl import LogWriter - vdl_logdir = osp.join(save_dir, 'vdl_log') - log_writer = LogWriter(vdl_logdir) - best_miou = -1.0 - best_model_epoch = 1 - for i in range(self.begin_epoch, num_epochs): - records = list() - step_start_time = time.time() - epoch_start_time = time.time() - for step, data in enumerate(self.train_data_loader()): - outputs = self.exe.run( - self.parallel_train_prog, - feed=data, - fetch_list=list(self.train_outputs.values())) - outputs_avg = np.mean(np.array(outputs), axis=1) - records.append(outputs_avg) - - # time estimated to complete the training - currend_time = time.time() - step_cost_time = currend_time - step_start_time - step_start_time = currend_time - if len(time_stat) < 20: - time_stat.append(step_cost_time) - else: - time_stat[num_steps % 20] = step_cost_time - - num_steps += 1 - if num_steps % log_interval_steps == 0: - step_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), outputs_avg)) - - if use_vdl: - for k, v in step_metrics.items(): - log_writer.add_scalar( - step=num_steps, - tag='train/{}'.format(k), - value=v) - - # 计算剩余时间 - avg_step_time = np.mean(time_stat) - if time_train_one_epoch is not None: - eta = (num_epochs - i - 1) * time_train_one_epoch + ( - total_num_steps - step - 1) * avg_step_time - else: - eta = ((num_epochs - i) * total_num_steps - step - - 1) * avg_step_time - if time_eval_one_epoch is not None: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * time_eval_one_epoch - else: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * total_num_steps_eval * avg_step_time - eta_str = seconds_to_hms(eta + eval_eta) - - logging.info( - "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" - .format(i + 1, num_epochs, step + 1, total_num_steps, - dict2str(step_metrics), round(avg_step_time, 2), - eta_str)) - - train_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) - logging.info('[TRAIN] Epoch {} finished, {} .'.format( - i + 1, dict2str(train_metrics))) - time_train_one_epoch = time.time() - epoch_start_time - - eval_epoch_start_time = time.time() - if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: - current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) - if not osp.isdir(current_save_dir): - os.makedirs(current_save_dir) - if eval_dataset is not None: - self.eval_metrics = self.evaluate( - eval_dataset=eval_dataset, - batch_size=eval_batch_size, - epoch_id=i + 1) - # 保存最优模型 - current_miou = self.eval_metrics['miou'] - if current_miou > best_miou: - best_miou = current_miou - best_model_epoch = i + 1 - best_model_dir = osp.join(save_dir, "best_model") - self.save_model(save_dir=best_model_dir) - if use_vdl: - for k, v in self.eval_metrics.items(): - if isinstance(v, list): - continue - if isinstance(v, np.ndarray): - if v.size > 1: - continue - log_writer.add_scalar( - step=num_steps, - tag='evaluate/{}'.format(k), - value=v) - self.save_model(save_dir=current_save_dir) - time_eval_one_epoch = time.time() - eval_epoch_start_time - if eval_dataset is not None: - logging.info( - 'Current evaluated best model in eval_dataset is epoch_{}, miou={}' - .format(best_model_epoch, best_miou)) - - if quant: - if osp.exists(osp.join(save_dir, "best_model")): - fluid.load( - program=self.test_prog, - model_path=osp.join(save_dir, "best_model"), - executor=self.exe) - self.export_quant_model( - save_dir=osp.join(save_dir, "best_model_export"), - quant_type="online") - - def evaluate(self, eval_dataset, batch_size=1, epoch_id=None): - """评估。 - - Args: - eval_dataset (paddlex.datasets): 评估数据读取器。 - batch_size (int): 评估时的batch大小。默认1。 - epoch_id (int): 当前评估模型所在的训练轮数。 - return_details (bool): 是否返回详细信息。默认False。 - - Returns: - dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 - 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 - tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), - 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 - """ - self.arrange_transform(transforms=eval_dataset.transforms, mode='train') - total_steps = math.ceil(eval_dataset.num_samples * 1.0 / batch_size) - conf_mat = ConfusionMatrix(self.num_classes, streaming=True) - data_generator = eval_dataset.generator( - batch_size=batch_size, drop_last=False) - if not hasattr(self, 'parallel_test_prog'): - self.parallel_test_prog = fluid.CompiledProgram( - self.test_prog).with_data_parallel( - share_vars_from=self.parallel_train_prog) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_dataset.num_samples, total_steps)) - for step, data in tqdm.tqdm( - enumerate(data_generator()), total=total_steps): - images = np.array([d[0] for d in data]) - labels = np.array([d[1] for d in data]) - num_samples = images.shape[0] - if num_samples < batch_size: - num_pad_samples = batch_size - num_samples - pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) - images = np.concatenate([images, pad_images]) - feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) - pred = outputs[0] - if num_samples < batch_size: - pred = pred[0:num_samples] - - mask = labels != self.ignore_index - conf_mat.calculate(pred=pred, label=labels, ignore=mask) - _, iou = conf_mat.mean_iou() - - logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format( - epoch_id, step + 1, total_steps, iou)) - - category_iou, miou = conf_mat.mean_iou() - category_acc, macc = conf_mat.accuracy() - - metrics = OrderedDict( - zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'], - [miou, category_iou, macc, category_acc, - conf_mat.kappa()])) - - logging.info('[EVAL] Finished, Epoch={}, {} .'.format( - epoch_id, dict2str(metrics))) - return metrics - - def predict(self, im_file, transforms=None): - """预测。 - Args: - img_file(str|np.ndarray): 预测图像。 - transforms(paddlex.cv.transforms): 数据预处理操作。 - - Returns: - dict: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图, - 像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes) - """ - if isinstance(im_file, str): - if not osp.exists(im_file): - raise ValueError( - 'The Image file does not exist: {}'.format(im_file)) - - if transforms is None and not hasattr(self, 'test_transforms'): - raise Exception("transforms need to be defined, now is None.") - if transforms is not None: - self.arrange_transform(transforms=transforms, mode='test') - im, im_info = transforms(im_file) - else: - self.arrange_transform(transforms=self.test_transforms, mode='test') - im, im_info = self.test_transforms(im_file) - im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) - pred = result[0] - logit = result[1] - logit = np.squeeze(logit) - logit = np.transpose(logit, (1, 2, 0)) - pred = np.squeeze(pred).astype('uint8') - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] - pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - logit = cv2.resize(logit, (w, h), cv2.INTER_LINEAR) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] - pred = pred[0:h, 0:w] - logit = logit[0:h, 0:w, :] - - return {'label_map': pred, 'score_map': logit} - - -class HumanSegLite(SegModel): - # DeepLab ShuffleNet - def build_net(self, mode='train'): - """应根据不同的情况进行构建""" - model = ShuffleSeg( - self.num_classes, - mode=mode, - use_bce_loss=self.use_bce_loss, - use_dice_loss=self.use_dice_loss, - class_weight=self.class_weight, - ignore_index=self.ignore_index) - inputs = model.generate_inputs() - model_out = model.build_net(inputs) - outputs = OrderedDict() - if mode == 'train': - self.optimizer.minimize(model_out) - outputs['loss'] = model_out - else: - outputs['pred'] = model_out[0] - outputs['logit'] = model_out[1] - return inputs, outputs - - -class HumanSegServer(SegModel): - # DeepLab Xception - def __init__(self, - num_classes=2, - backbone='Xception65', - output_stride=16, - aspp_with_sep_conv=True, - decoder_use_sep_conv=True, - encoder_with_aspp=True, - enable_decoder=True, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - super().__init__( - num_classes=num_classes, - use_bce_loss=use_bce_loss, - use_dice_loss=use_dice_loss, - class_weight=class_weight, - ignore_index=ignore_index, - sync_bn=sync_bn) - self.init_params = locals() - - self.output_stride = output_stride - - if backbone not in ['Xception65', 'Xception41']: - raise ValueError("backbone: {} is set wrong. it should be one of " - "('Xception65', 'Xception41')".format(backbone)) - - self.backbone = backbone - self.aspp_with_sep_conv = aspp_with_sep_conv - self.decoder_use_sep_conv = decoder_use_sep_conv - self.encoder_with_aspp = encoder_with_aspp - self.enable_decoder = enable_decoder - self.sync_bn = sync_bn - - def build_net(self, mode='train'): - model = DeepLabv3p( - self.num_classes, - mode=mode, - backbone=self.backbone, - output_stride=self.output_stride, - aspp_with_sep_conv=self.aspp_with_sep_conv, - decoder_use_sep_conv=self.decoder_use_sep_conv, - encoder_with_aspp=self.encoder_with_aspp, - enable_decoder=self.enable_decoder, - use_bce_loss=self.use_bce_loss, - use_dice_loss=self.use_dice_loss, - class_weight=self.class_weight, - ignore_index=self.ignore_index) - inputs = model.generate_inputs() - model_out = model.build_net(inputs) - outputs = OrderedDict() - if mode == 'train': - self.optimizer.minimize(model_out) - outputs['loss'] = model_out - else: - outputs['pred'] = model_out[0] - outputs['logit'] = model_out[1] - return inputs, outputs - - -class HumanSegMobile(SegModel): - def __init__(self, - num_classes=2, - stage1_num_modules=1, - stage1_num_blocks=[1], - stage1_num_channels=[32], - stage2_num_modules=1, - stage2_num_blocks=[2, 2], - stage2_num_channels=[16, 32], - stage3_num_modules=1, - stage3_num_blocks=[2, 2, 2], - stage3_num_channels=[16, 32, 64], - stage4_num_modules=1, - stage4_num_blocks=[2, 2, 2, 2], - stage4_num_channels=[16, 32, 64, 128], - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - super().__init__( - num_classes=num_classes, - use_bce_loss=use_bce_loss, - use_dice_loss=use_dice_loss, - class_weight=class_weight, - ignore_index=ignore_index, - sync_bn=sync_bn) - self.init_params = locals() - - self.stage1_num_modules = stage1_num_modules - self.stage1_num_blocks = stage1_num_blocks - self.stage1_num_channels = stage1_num_channels - self.stage2_num_modules = stage2_num_modules - self.stage2_num_blocks = stage2_num_blocks - self.stage2_num_channels = stage2_num_channels - self.stage3_num_modules = stage3_num_modules - self.stage3_num_blocks = stage3_num_blocks - self.stage3_num_channels = stage3_num_channels - self.stage4_num_modules = stage4_num_modules - self.stage4_num_blocks = stage4_num_blocks - self.stage4_num_channels = stage4_num_channels - - def build_net(self, mode='train'): - """应根据不同的情况进行构建""" - model = HRNet( - self.num_classes, - mode=mode, - stage1_num_modules=self.stage1_num_modules, - stage1_num_blocks=self.stage1_num_blocks, - stage1_num_channels=self.stage1_num_channels, - stage2_num_modules=self.stage2_num_modules, - stage2_num_blocks=self.stage2_num_blocks, - stage2_num_channels=self.stage2_num_channels, - stage3_num_modules=self.stage3_num_modules, - stage3_num_blocks=self.stage3_num_blocks, - stage3_num_channels=self.stage3_num_channels, - stage4_num_modules=self.stage4_num_modules, - stage4_num_blocks=self.stage4_num_blocks, - stage4_num_channels=self.stage4_num_channels, - use_bce_loss=self.use_bce_loss, - use_dice_loss=self.use_dice_loss, - class_weight=self.class_weight, - ignore_index=self.ignore_index) - inputs = model.generate_inputs() - model_out = model.build_net(inputs) - outputs = OrderedDict() - if mode == 'train': - self.optimizer.minimize(model_out) - outputs['loss'] = model_out - else: - outputs['pred'] = model_out[0] - outputs['logit'] = model_out[1] - return inputs, outputs - - def train(self, - num_epochs, - train_dataset, - train_batch_size=2, - eval_dataset=None, - save_interval_epochs=1, - log_interval_steps=2, - save_dir='output', - pretrained_weights=None, - resume_weights=None, - optimizer=None, - learning_rate=0.01, - lr_decay_power=0.9, - regularization_coeff=5e-4, - use_vdl=False, - quant=False): - super().train( - num_epochs=num_epochs, - train_dataset=train_dataset, - train_batch_size=train_batch_size, - eval_dataset=eval_dataset, - save_interval_epochs=save_interval_epochs, - log_interval_steps=log_interval_steps, - save_dir=save_dir, - pretrained_weights=pretrained_weights, - resume_weights=resume_weights, - optimizer=optimizer, - learning_rate=learning_rate, - lr_decay_power=lr_decay_power, - regularization_coeff=regularization_coeff, - use_vdl=use_vdl, - quant=quant) diff --git a/legacy/contrib/HumanSeg/models/load_model.py b/legacy/contrib/HumanSeg/models/load_model.py deleted file mode 100644 index dadaa2b5c6..0000000000 --- a/legacy/contrib/HumanSeg/models/load_model.py +++ /dev/null @@ -1,92 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import yaml -import os.path as osp -import six -import copy -from collections import OrderedDict -import paddle.fluid as fluid -import utils.logging as logging -import models - - -def load_model(model_dir): - if not osp.exists(osp.join(model_dir, "model.yml")): - raise Exception("There's no model.yml in {}".format(model_dir)) - with open(osp.join(model_dir, "model.yml")) as f: - info = yaml.load(f.read(), Loader=yaml.Loader) - status = info['status'] - - if not hasattr(models, info['Model']): - raise Exception("There's no attribute {} in models".format( - info['Model'])) - model = getattr(models, info['Model'])(**info['_init_params']) - if status in ["Normal", "QuantOnline"]: - startup_prog = fluid.Program() - model.test_prog = fluid.Program() - with fluid.program_guard(model.test_prog, startup_prog): - with fluid.unique_name.guard(): - model.test_inputs, model.test_outputs = model.build_net( - mode='test') - model.test_prog = model.test_prog.clone(for_test=True) - if status == "QuantOnline": - print('test quant online') - import paddleslim as slim - model.test_prog = slim.quant.quant_aware( - model.test_prog, model.exe.place, for_test=True) - model.exe.run(startup_prog) - fluid.load(model.test_prog, osp.join(model_dir, 'model')) - if status == "QuantOnline": - model.test_prog = slim.quant.convert(model.test_prog, - model.exe.place) - - elif status in ['Infer', 'Quant']: - [prog, input_names, outputs] = fluid.io.load_inference_model( - model_dir, model.exe, params_filename='__params__') - model.test_prog = prog - test_outputs_info = info['_ModelInputsOutputs']['test_outputs'] - model.test_inputs = OrderedDict() - model.test_outputs = OrderedDict() - for name in input_names: - model.test_inputs[name] = model.test_prog.global_block().var(name) - for i, out in enumerate(outputs): - var_desc = test_outputs_info[i] - model.test_outputs[var_desc[0]] = out - if 'test_transforms' in info: - model.test_transforms = build_transforms(info['test_transforms']) - model.eval_transforms = copy.deepcopy(model.test_transforms) - - if '_Attributes' in info: - for k, v in info['_Attributes'].items(): - if k in model.__dict__: - model.__dict__[k] = v - - logging.info("Model[{}] loaded.".format(info['Model'])) - return model - - -def build_transforms(transforms_info): - import transforms as T - transforms = list() - for op_info in transforms_info: - op_name = list(op_info.keys())[0] - op_attr = op_info[op_name] - if not hasattr(T, op_name): - raise Exception( - "There's no operator named '{}' in transforms".format(op_name)) - transforms.append(getattr(T, op_name)(**op_attr)) - eval_transforms = T.Compose(transforms) - return eval_transforms diff --git a/legacy/contrib/HumanSeg/nets/__init__.py b/legacy/contrib/HumanSeg/nets/__init__.py deleted file mode 100644 index cb0cc37b82..0000000000 --- a/legacy/contrib/HumanSeg/nets/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .backbone import mobilenet_v2 -from .backbone import xception -from .deeplabv3p import DeepLabv3p -from .shufflenet_slim import ShuffleSeg -from .hrnet import HRNet diff --git a/legacy/contrib/HumanSeg/nets/backbone/__init__.py b/legacy/contrib/HumanSeg/nets/backbone/__init__.py deleted file mode 100644 index 338ee31482..0000000000 --- a/legacy/contrib/HumanSeg/nets/backbone/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .mobilenet_v2 import MobileNetV2 -from .xception import Xception diff --git a/legacy/contrib/HumanSeg/nets/backbone/mobilenet_v2.py b/legacy/contrib/HumanSeg/nets/backbone/mobilenet_v2.py deleted file mode 100644 index 3e95026196..0000000000 --- a/legacy/contrib/HumanSeg/nets/backbone/mobilenet_v2.py +++ /dev/null @@ -1,244 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - - -class MobileNetV2: - def __init__(self, - num_classes=None, - scale=1.0, - output_stride=None, - end_points=None, - decode_points=None): - self.scale = scale - self.num_classes = num_classes - self.output_stride = output_stride - self.end_points = end_points - self.decode_points = decode_points - self.bottleneck_params_list = [(1, 16, 1, 1), (6, 24, 2, 2), - (6, 32, 3, 2), (6, 64, 4, 2), - (6, 96, 3, 1), (6, 160, 3, 2), - (6, 320, 1, 1)] - self.modify_bottle_params(output_stride) - - def __call__(self, input): - scale = self.scale - decode_ends = dict() - - def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - # conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - - layer_count = 1 - - if check_points(layer_count, self.decode_points): - decode_ends[layer_count] = input - - if check_points(layer_count, self.end_points): - return input, decode_ends - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in self.bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input, depthwise_output = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - layer_count += n - - if check_points(layer_count, self.decode_points): - decode_ends[layer_count] = depthwise_output - - if check_points(layer_count, self.end_points): - return input, decode_ends - - # last_conv - output = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - if self.num_classes is not None: - output = fluid.layers.pool2d( - input=output, pool_type='avg', global_pooling=True) - - output = fluid.layers.fc( - input=output, - size=self.num_classes, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output - - def modify_bottle_params(self, output_stride=None): - if output_stride is not None and output_stride % 2 != 0: - raise Exception("output stride must to be even number") - if output_stride is None: - return - else: - stride = 2 - for i, layer_setting in enumerate(self.bottleneck_params_list): - t, c, n, s = layer_setting - stride = stride * s - if stride > output_stride: - s = 1 - self.bottleneck_params_list[i] = (t, c, n, s) - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - depthwise_output = bottleneck_conv - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out, depthwise_output - else: - return linear_out, depthwise_output - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block, depthwise_output = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block, depthwise_output = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block, depthwise_output diff --git a/legacy/contrib/HumanSeg/nets/backbone/xception.py b/legacy/contrib/HumanSeg/nets/backbone/xception.py deleted file mode 100644 index 00a734eb81..0000000000 --- a/legacy/contrib/HumanSeg/nets/backbone/xception.py +++ /dev/null @@ -1,321 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import math -import paddle.fluid as fluid -from nets.libs import scope, name_scope -from nets.libs import bn, bn_relu, relu -from nets.libs import conv -from nets.libs import separate_conv - -__all__ = ['xception_65', 'xception_41', 'xception_71'] - - -def check_data(data, number): - if type(data) == int: - return [data] * number - assert len(data) == number - return data - - -def check_stride(s, os): - if s <= os: - return True - else: - return False - - -def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - -class Xception(): - def __init__(self, - num_classes=None, - layers=65, - output_stride=32, - end_points=None, - decode_points=None): - self.backbone = 'xception_' + str(layers) - self.num_classes = num_classes - self.output_stride = output_stride - self.end_points = end_points - self.decode_points = decode_points - self.bottleneck_params = self.gen_bottleneck_params(self.backbone) - - def __call__( - self, - input, - ): - self.stride = 2 - self.block_point = 0 - self.short_cuts = dict() - with scope(self.backbone): - # Entry flow - data = self.entry_flow(input) - if check_points(self.block_point, self.end_points): - return data, self.short_cuts - - # Middle flow - data = self.middle_flow(data) - if check_points(self.block_point, self.end_points): - return data, self.short_cuts - - # Exit flow - data = self.exit_flow(data) - if check_points(self.block_point, self.end_points): - return data, self.short_cuts - - if self.num_classes is not None: - data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) - data = fluid.layers.dropout(data, 0.5) - stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) - with scope("logit"): - out = fluid.layers.fc( - input=data, - size=self.num_classes, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - name='weights', - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=fluid.param_attr.ParamAttr(name='bias')) - - return out - else: - return data - - def gen_bottleneck_params(self, backbone='xception_65'): - if backbone == 'xception_65': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_41': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (8, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_71': - bottleneck_params = { - "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - else: - raise Exception( - "xception backbont only support xception_41/xception_65/xception_71" - ) - return bottleneck_params - - def entry_flow(self, data): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) - with scope("entry_flow"): - with scope("conv1"): - data = bn_relu( - conv( - data, 32, 3, stride=2, padding=1, - param_attr=param_attr), - eps=1e-3) - with scope("conv2"): - data = bn_relu( - conv( - data, 64, 3, stride=1, padding=1, - param_attr=param_attr), - eps=1e-3) - - # get entry flow params - block_num = self.bottleneck_params["entry_flow"][0] - strides = self.bottleneck_params["entry_flow"][1] - chns = self.bottleneck_params["entry_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("entry_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def middle_flow(self, data): - block_num = self.bottleneck_params["middle_flow"][0] - strides = self.bottleneck_params["middle_flow"][1] - chns = self.bottleneck_params["middle_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("middle_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, strides[i]], skip_conv=False) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def exit_flow(self, data): - block_num = self.bottleneck_params["exit_flow"][0] - strides = self.bottleneck_params["exit_flow"][1] - chns = self.bottleneck_params["exit_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - assert (block_num == 2) - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("exit_flow"): - with scope('block1'): - block_point += 1 - stride = strides[0] if check_stride(s * strides[0], - output_stride) else 1 - data, short_cuts = self.xception_block(data, chns[0], - [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - with scope('block2'): - block_point += 1 - stride = strides[1] if check_stride(s * strides[1], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, - chns[1], [1, 1, stride], - dilation=2, - has_skip=False, - activation_fn_in_separable_conv=True) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def xception_block(self, - input, - channels, - strides=1, - filters=3, - dilation=1, - skip_conv=True, - has_skip=True, - activation_fn_in_separable_conv=False): - repeat_number = 3 - channels = check_data(channels, repeat_number) - filters = check_data(filters, repeat_number) - strides = check_data(strides, repeat_number) - data = input - results = [] - for i in range(repeat_number): - with scope('separable_conv' + str(i + 1)): - if not activation_fn_in_separable_conv: - data = relu(data) - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation, - eps=1e-3) - else: - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation, - act=relu, - eps=1e-3) - results.append(data) - if not has_skip: - return data, results - if skip_conv: - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal( - loc=0.0, scale=0.09)) - with scope('shortcut'): - skip = bn( - conv( - input, - channels[-1], - 1, - strides[-1], - groups=1, - padding=0, - param_attr=param_attr), - eps=1e-3) - else: - skip = input - return data + skip, results - - -def xception_65(num_classes=None): - model = Xception(num_classes, 65) - return model - - -def xception_41(num_classes=None): - model = Xception(num_classes, 41) - return model - - -def xception_71(num_classes=None): - model = Xception(num_classes, 71) - return model diff --git a/legacy/contrib/HumanSeg/nets/deeplabv3p.py b/legacy/contrib/HumanSeg/nets/deeplabv3p.py deleted file mode 100644 index d076c808db..0000000000 --- a/legacy/contrib/HumanSeg/nets/deeplabv3p.py +++ /dev/null @@ -1,415 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid -from .libs import scope, name_scope -from .libs import bn_relu, relu -from .libs import conv -from .libs import separate_conv -from .libs import sigmoid_to_softmax -from .seg_modules import softmax_with_loss -from .seg_modules import dice_loss -from .seg_modules import bce_loss -from .backbone import MobileNetV2 -from .backbone import Xception - - -class DeepLabv3p(object): - """实现DeepLabv3+模型 - `"Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" - ` - - Args: - num_classes (int): 类别数。 - backbone (str): DeepLabv3+的backbone网络,实现特征图的计算,取值范围为['Xception65', 'Xception41', - 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.0', 'MobileNetV2_x1.5', - 'MobileNetV2_x2.0']。默认'MobileNetV2_x1.0'。 - mode (str): 网络运行模式,根据mode构建网络的输入和返回。 - 当mode为'train'时,输入为image(-1, 3, -1, -1)和label (-1, 1, -1, -1) 返回loss。 - 当mode为'train'时,输入为image (-1, 3, -1, -1)和label (-1, 1, -1, -1),返回loss, - pred (与网络输入label 相同大小的预测结果,值代表相应的类别),label,mask(非忽略值的mask, - 与label相同大小,bool类型)。 - 当mode为'test'时,输入为image(-1, 3, -1, -1)返回pred (-1, 1, -1, -1)和 - logit (-1, num_classes, -1, -1) 通道维上代表每一类的概率值。 - output_stride (int): backbone 输出特征图相对于输入的下采样倍数,一般取值为8或16。 - aspp_with_sep_conv (bool): 在asspp模块是否采用separable convolutions。 - decoder_use_sep_conv (bool): decoder模块是否采用separable convolutions。 - encoder_with_aspp (bool): 是否在encoder阶段采用aspp模块。 - enable_decoder (bool): 是否使用decoder模块。 - use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。 - use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 - 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。 - class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 - num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 - 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, - 即平时使用的交叉熵损失函数。 - ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。 - - Raises: - ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 - ValueError: class_weight为list, 但长度不等于num_class。 - class_weight为str, 但class_weight.low()不等于dynamic。 - TypeError: class_weight不为None时,其类型不是list或str。 - """ - - def __init__(self, - num_classes, - backbone='MobileNetV2_x1.0', - mode='train', - output_stride=16, - aspp_with_sep_conv=True, - decoder_use_sep_conv=True, - encoder_with_aspp=True, - enable_decoder=True, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255): - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.backbone = backbone - self.mode = mode - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.output_stride = output_stride - self.aspp_with_sep_conv = aspp_with_sep_conv - self.decoder_use_sep_conv = decoder_use_sep_conv - self.encoder_with_aspp = encoder_with_aspp - self.enable_decoder = enable_decoder - - def _get_backbone(self, backbone): - def mobilenetv2(backbone): - # backbone: xception结构配置 - # output_stride:下采样倍数 - # end_points: mobilenetv2的block数 - # decode_point: 从mobilenetv2中引出分支所在block数, 作为decoder输入 - if '0.25' in backbone: - scale = 0.25 - elif '0.5' in backbone: - scale = 0.5 - elif '1.0' in backbone: - scale = 1.0 - elif '1.5' in backbone: - scale = 1.5 - elif '2.0' in backbone: - scale = 2.0 - end_points = 18 - decode_points = 4 - return MobileNetV2( - scale=scale, - output_stride=self.output_stride, - end_points=end_points, - decode_points=decode_points) - - def xception(backbone): - # decode_point: 从Xception中引出分支所在block数,作为decoder输入 - # end_point:Xception的block数 - if '65' in backbone: - decode_points = 2 - end_points = 21 - layers = 65 - if '41' in backbone: - decode_points = 2 - end_points = 13 - layers = 41 - if '71' in backbone: - decode_points = 3 - end_points = 23 - layers = 71 - return Xception( - layers=layers, - output_stride=self.output_stride, - end_points=end_points, - decode_points=decode_points) - - if 'Xception' in backbone: - return xception(backbone) - elif 'MobileNetV2' in backbone: - return mobilenetv2(backbone) - - def _encoder(self, input): - # 编码器配置,采用ASPP架构,pooling + 1x1_conv + 三个不同尺度的空洞卷积并行, concat后1x1conv - # ASPP_WITH_SEP_CONV:默认为真,使用depthwise可分离卷积,否则使用普通卷积 - # OUTPUT_STRIDE: 下采样倍数,8或16,决定aspp_ratios大小 - # aspp_ratios:ASPP模块空洞卷积的采样率 - - if self.output_stride == 16: - aspp_ratios = [6, 12, 18] - elif self.output_stride == 8: - aspp_ratios = [12, 24, 36] - else: - raise Exception("DeepLabv3p only support stride 8 or 16") - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('encoder'): - channel = 256 - with scope("image_pool"): - image_avg = fluid.layers.reduce_mean( - input, [2, 3], keep_dim=True) - image_avg = bn_relu( - conv( - image_avg, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - input_shape = fluid.layers.shape(input) - image_avg = fluid.layers.resize_bilinear( - image_avg, input_shape[2:]) - - with scope("aspp0"): - aspp0 = bn_relu( - conv( - input, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - with scope("aspp1"): - if self.aspp_with_sep_conv: - aspp1 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[0], act=relu) - else: - aspp1 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[0], - padding=aspp_ratios[0], - param_attr=param_attr)) - with scope("aspp2"): - if self.aspp_with_sep_conv: - aspp2 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[1], act=relu) - else: - aspp2 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[1], - padding=aspp_ratios[1], - param_attr=param_attr)) - with scope("aspp3"): - if self.aspp_with_sep_conv: - aspp3 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[2], act=relu) - else: - aspp3 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[2], - padding=aspp_ratios[2], - param_attr=param_attr)) - with scope("concat"): - data = fluid.layers.concat( - [image_avg, aspp0, aspp1, aspp2, aspp3], axis=1) - data = bn_relu( - conv( - data, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - data = fluid.layers.dropout(data, 0.9) - return data - - def _decoder(self, encode_data, decode_shortcut): - # 解码器配置 - # encode_data:编码器输出 - # decode_shortcut: 从backbone引出的分支, resize后与encode_data concat - # decoder_use_sep_conv: 默认为真,则concat后连接两个可分离卷积,否则为普通卷积 - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('decoder'): - with scope('concat'): - decode_shortcut = bn_relu( - conv( - decode_shortcut, - 48, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - - decode_shortcut_shape = fluid.layers.shape(decode_shortcut) - encode_data = fluid.layers.resize_bilinear( - encode_data, decode_shortcut_shape[2:]) - encode_data = fluid.layers.concat( - [encode_data, decode_shortcut], axis=1) - if self.decoder_use_sep_conv: - with scope("separable_conv1"): - encode_data = separate_conv( - encode_data, 256, 1, 3, dilation=1, act=relu) - with scope("separable_conv2"): - encode_data = separate_conv( - encode_data, 256, 1, 3, dilation=1, act=relu) - else: - with scope("decoder_conv1"): - encode_data = bn_relu( - conv( - encode_data, - 256, - stride=1, - filter_size=3, - dilation=1, - padding=1, - param_attr=param_attr)) - with scope("decoder_conv2"): - encode_data = bn_relu( - conv( - encode_data, - 256, - stride=1, - filter_size=3, - dilation=1, - padding=1, - param_attr=param_attr)) - return encode_data - - def _get_loss(self, logit, label, mask): - avg_loss = 0 - if not (self.use_dice_loss or self.use_bce_loss): - avg_loss += softmax_with_loss( - logit, - label, - mask, - num_classes=self.num_classes, - weight=self.class_weight, - ignore_index=self.ignore_index) - else: - if self.use_dice_loss: - avg_loss += dice_loss(logit, label, mask) - if self.use_bce_loss: - avg_loss += bce_loss( - logit, label, mask, ignore_index=self.ignore_index) - - return avg_loss - - def generate_inputs(self): - inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', shape=[None, 3, None, None], name='image') - if self.mode == 'train': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - return inputs - - def build_net(self, inputs): - # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1 - if self.use_dice_loss or self.use_bce_loss: - self.num_classes = 1 - image = inputs['image'] - - backbone_net = self._get_backbone(self.backbone) - data, decode_shortcuts = backbone_net(image) - decode_shortcut = decode_shortcuts[backbone_net.decode_points] - - # 编码器解码器设置 - if self.encoder_with_aspp: - data = self._encoder(data) - if self.enable_decoder: - data = self._decoder(data, decode_shortcut) - - # 根据类别数设置最后一个卷积层输出,并resize到图片原始尺寸 - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - with scope('logit'): - with fluid.name_scope('last_conv'): - logit = conv( - data, - self.num_classes, - 1, - stride=1, - padding=0, - bias_attr=True, - param_attr=param_attr) - image_shape = fluid.layers.shape(image) - logit = fluid.layers.resize_bilinear(logit, image_shape[2:]) - - if self.num_classes == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if self.mode == 'train': - label = inputs['label'] - mask = label != self.ignore_index - return self._get_loss(logit, label, mask) - - else: - if self.num_classes == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = fluid.layers.softmax(logit, axis=1) - return pred, logit - - return logit diff --git a/legacy/contrib/HumanSeg/nets/hrnet.py b/legacy/contrib/HumanSeg/nets/hrnet.py deleted file mode 100644 index 52e4949b9d..0000000000 --- a/legacy/contrib/HumanSeg/nets/hrnet.py +++ /dev/null @@ -1,451 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from .seg_modules import softmax_with_loss -from .seg_modules import dice_loss -from .seg_modules import bce_loss -from .libs import sigmoid_to_softmax - - -class HRNet(object): - def __init__(self, - num_classes, - mode='train', - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[18, 36], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[18, 36, 72, 144], - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255): - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.mode = mode - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.stage1_num_modules = stage1_num_modules - self.stage1_num_blocks = stage1_num_blocks - self.stage1_num_channels = stage1_num_channels - self.stage2_num_modules = stage2_num_modules - self.stage2_num_blocks = stage2_num_blocks - self.stage2_num_channels = stage2_num_channels - self.stage3_num_modules = stage3_num_modules - self.stage3_num_blocks = stage3_num_blocks - self.stage3_num_channels = stage3_num_channels - self.stage4_num_modules = stage4_num_modules - self.stage4_num_blocks = stage4_num_blocks - self.stage4_num_channels = stage4_num_channels - - def build_net(self, inputs): - if self.use_dice_loss or self.use_bce_loss: - self.num_classes = 1 - image = inputs['image'] - logit = self._high_resolution_net(image, self.num_classes) - if self.num_classes == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if self.mode == 'train': - label = inputs['label'] - mask = label != self.ignore_index - return self._get_loss(logit, label, mask) - - else: - if self.num_classes == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = fluid.layers.softmax(logit, axis=1) - return pred, logit - - return logit - - def generate_inputs(self): - inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', shape=[None, 3, None, None], name='image') - if self.mode == 'train': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - return inputs - - def _get_loss(self, logit, label, mask): - avg_loss = 0 - if not (self.use_dice_loss or self.use_bce_loss): - avg_loss += softmax_with_loss( - logit, - label, - mask, - num_classes=self.num_classes, - weight=self.class_weight, - ignore_index=self.ignore_index) - else: - if self.use_dice_loss: - avg_loss += dice_loss(logit, label, mask) - if self.use_bce_loss: - avg_loss += bce_loss( - logit, label, mask, ignore_index=self.ignore_index) - - return avg_loss - - def _conv_bn_layer(self, - input, - filter_size, - num_filters, - stride=1, - padding=1, - num_groups=1, - if_act=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=num_groups, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr( - name=bn_name + "_scale", - initializer=fluid.initializer.Constant(1.0)), - bias_attr=ParamAttr( - name=bn_name + "_offset", - initializer=fluid.initializer.Constant(0.0)), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - bn = fluid.layers.relu(bn) - return bn - - def _basic_block(self, - input, - num_filters, - stride=1, - downsample=False, - name=None): - residual = input - conv = self._conv_bn_layer( - input=input, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv1') - conv = self._conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - if_act=False, - name=name + '_conv2') - if downsample: - residual = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - def _bottleneck_block(self, - input, - num_filters, - stride=1, - downsample=False, - name=None): - residual = input - conv = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - name=name + '_conv1') - conv = self._conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv2') - conv = self._conv_bn_layer( - input=conv, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_conv3') - if downsample: - residual = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - def _fuse_layers(self, x, channels, multi_scale_output=True, name=None): - out = [] - for i in range(len(channels) if multi_scale_output else 1): - residual = x[i] - shape = fluid.layers.shape(residual)[-2:] - for j in range(len(channels)): - if j > i: - y = self._conv_bn_layer( - x[j], - filter_size=1, - num_filters=channels[i], - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) - y = fluid.layers.resize_bilinear(input=y, out_shape=shape) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - elif j < i: - y = x[j] - for k in range(i - j): - if k == i - j - 1: - y = self._conv_bn_layer( - y, - filter_size=3, - num_filters=channels[i], - stride=2, - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - else: - y = self._conv_bn_layer( - y, - filter_size=3, - num_filters=channels[j], - stride=2, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - - residual = fluid.layers.relu(residual) - out.append(residual) - return out - - def _branches(self, x, block_num, channels, name=None): - out = [] - for i in range(len(channels)): - residual = x[i] - for j in range(block_num[i]): - residual = self._basic_block( - residual, - channels[i], - name=name + '_branch_layer_' + str(i + 1) + '_' + - str(j + 1)) - out.append(residual) - return out - - def _high_resolution_module(self, - x, - blocks, - channels, - multi_scale_output=True, - name=None): - residual = self._branches(x, blocks, channels, name=name) - out = self._fuse_layers( - residual, - channels, - multi_scale_output=multi_scale_output, - name=name) - return out - - def _transition_layer(self, x, in_channels, out_channels, name=None): - num_in = len(in_channels) - num_out = len(out_channels) - out = [] - for i in range(num_out): - if i < num_in: - if in_channels[i] != out_channels[i]: - residual = self._conv_bn_layer( - x[i], - filter_size=3, - num_filters=out_channels[i], - name=name + '_layer_' + str(i + 1)) - out.append(residual) - else: - out.append(x[i]) - else: - residual = self._conv_bn_layer( - x[-1], - filter_size=3, - num_filters=out_channels[i], - stride=2, - name=name + '_layer_' + str(i + 1)) - out.append(residual) - return out - - def _stage(self, - x, - num_modules, - num_blocks, - num_channels, - multi_scale_output=True, - name=None): - out = x - for i in range(num_modules): - if i == num_modules - 1 and multi_scale_output == False: - out = self._high_resolution_module( - out, - num_blocks, - num_channels, - multi_scale_output=False, - name=name + '_' + str(i + 1)) - else: - out = self._high_resolution_module( - out, num_blocks, num_channels, name=name + '_' + str(i + 1)) - - return out - - def _layer1(self, input, num_modules, num_blocks, num_channels, name=None): - # num_modules 默认为1,是否增加处理,官网实现为[1],是否对齐。 - conv = input - for i in range(num_blocks[0]): - conv = self._bottleneck_block( - conv, - num_filters=num_channels[0], - downsample=True if i == 0 else False, - name=name + '_' + str(i + 1)) - return conv - - def _high_resolution_net(self, input, num_classes): - x = self._conv_bn_layer( - input=input, - filter_size=3, - num_filters=self.stage1_num_channels[0], - stride=2, - if_act=True, - name='layer1_1') - x = self._conv_bn_layer( - input=x, - filter_size=3, - num_filters=self.stage1_num_channels[0], - stride=2, - if_act=True, - name='layer1_2') - - la1 = self._layer1( - x, - self.stage1_num_modules, - self.stage1_num_blocks, - self.stage1_num_channels, - name='layer2') - tr1 = self._transition_layer([la1], - self.stage1_num_channels, - self.stage2_num_channels, - name='tr1') - st2 = self._stage( - tr1, - self.stage2_num_modules, - self.stage2_num_blocks, - self.stage2_num_channels, - name='st2') - tr2 = self._transition_layer( - st2, self.stage2_num_channels, self.stage3_num_channels, name='tr2') - st3 = self._stage( - tr2, - self.stage3_num_modules, - self.stage3_num_blocks, - self.stage3_num_channels, - name='st3') - tr3 = self._transition_layer( - st3, self.stage3_num_channels, self.stage4_num_channels, name='tr3') - st4 = self._stage( - tr3, - self.stage4_num_modules, - self.stage4_num_blocks, - self.stage4_num_channels, - name='st4') - - # upsample - shape = fluid.layers.shape(st4[0])[-2:] - st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape) - st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape) - st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) - - out = fluid.layers.concat(st4, axis=1) - last_channels = sum(self.stage4_num_channels) - - out = self._conv_bn_layer( - input=out, - filter_size=1, - num_filters=last_channels, - stride=1, - if_act=True, - name='conv-2') - out = fluid.layers.conv2d( - input=out, - num_filters=num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name='conv-1_weights'), - bias_attr=False) - - input_shape = fluid.layers.shape(input)[-2:] - out = fluid.layers.resize_bilinear(out, input_shape) - - return out diff --git a/legacy/contrib/HumanSeg/nets/libs.py b/legacy/contrib/HumanSeg/nets/libs.py deleted file mode 100644 index f74c93fc3f..0000000000 --- a/legacy/contrib/HumanSeg/nets/libs.py +++ /dev/null @@ -1,219 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -import contextlib - -bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) -name_scope = "" - - -@contextlib.contextmanager -def scope(name): - global name_scope - bk = name_scope - name_scope = name_scope + name + '/' - yield - name_scope = bk - - -def max_pool(input, kernel, stride, padding): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='max', - pool_stride=stride, - pool_padding=padding) - return data - - -def avg_pool(input, kernel, stride, padding=0): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='avg', - pool_stride=stride, - pool_padding=padding) - return data - - -def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - N, C, H, W = input.shape - if C % G != 0: - for d in range(10): - for t in [d, -d]: - if G + t <= 0: continue - if C % (G + t) == 0: - G = G + t - break - if C % G == 0: - break - assert C % G == 0, "group can not divide channle" - x = fluid.layers.group_norm( - input, - groups=G, - param_attr=param_attr, - bias_attr=bias_attr, - name=name_scope + 'group_norm') - return x - - -def bn(*args, - norm_type='bn', - eps=1e-5, - bn_momentum=0.99, - group_norm=32, - **kargs): - - if norm_type == 'bn': - with scope('BatchNorm'): - return fluid.layers.batch_norm( - *args, - epsilon=eps, - momentum=bn_momentum, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs) - elif norm_type == 'gn': - with scope('GroupNorm'): - return group_norm( - args[0], - group_norm, - eps=eps, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer)) - else: - raise Exception("Unsupport norm type:" + norm_type) - - -def bn_relu(data, norm_type='bn', eps=1e-5): - return fluid.layers.relu(bn(data, norm_type=norm_type, eps=eps)) - - -def relu(data): - return fluid.layers.relu(data) - - -def conv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = fluid.ParamAttr( - name=name_scope + 'biases', - regularizer=None, - initializer=fluid.initializer.ConstantInitializer(value=0.0)) - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d(*args, **kargs) - - -def deconv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = name_scope + 'biases' - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d_transpose(*args, **kargs) - - -def separate_conv(input, - channel, - stride, - filter, - dilation=1, - act=None, - eps=1e-5): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope('depthwise'): - input = conv( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - param_attr=param_attr) - input = bn(input, eps=eps) - if act: input = act(input) - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('pointwise'): - input = conv( - input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr) - input = bn(input, eps=eps) - if act: input = act(input) - return input - - -def conv_bn_layer(input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=fluid.ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=fluid.ParamAttr(name=bn_name + "_scale"), - bias_attr=fluid.ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - -def sigmoid_to_softmax(input): - """ - one channel to two channel - """ - logit = fluid.layers.sigmoid(input) - logit_back = 1 - logit - logit = fluid.layers.concat([logit_back, logit], axis=1) - return logit diff --git a/legacy/contrib/HumanSeg/nets/seg_modules.py b/legacy/contrib/HumanSeg/nets/seg_modules.py deleted file mode 100644 index a0794474a8..0000000000 --- a/legacy/contrib/HumanSeg/nets/seg_modules.py +++ /dev/null @@ -1,117 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as fluid -import numpy as np - - -def softmax_with_loss(logit, - label, - ignore_mask=None, - num_classes=2, - weight=None, - ignore_index=255): - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - label = fluid.layers.elementwise_min( - label, fluid.layers.assign(np.array([num_classes - 1], dtype=np.int32))) - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.reshape(logit, [-1, num_classes]) - label = fluid.layers.reshape(label, [-1, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.reshape(ignore_mask, [-1, 1]) - if weight is None: - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, label, ignore_index=ignore_index, return_softmax=True) - else: - label = fluid.layers.squeeze(label, axes=[-1]) - label_one_hot = fluid.one_hot(input=label, depth=num_classes) - if isinstance(weight, list): - assert len( - weight - ) == num_classes, "weight length must equal num of classes" - weight = fluid.layers.assign(np.array([weight], dtype='float32')) - elif isinstance(weight, str): - assert weight.lower( - ) == 'dynamic', 'if weight is string, must be dynamic!' - tmp = [] - total_num = fluid.layers.cast( - fluid.layers.shape(label)[0], 'float32') - for i in range(num_classes): - cls_pixel_num = fluid.layers.reduce_sum(label_one_hot[:, i]) - ratio = total_num / (cls_pixel_num + 1) - tmp.append(ratio) - weight = fluid.layers.concat(tmp) - weight = weight / fluid.layers.reduce_sum(weight) * num_classes - elif isinstance(weight, fluid.layers.Variable): - pass - else: - raise ValueError( - 'Expect weight is a list, string or Variable, but receive {}'. - format(type(weight))) - weight = fluid.layers.reshape(weight, [1, num_classes]) - weighted_label_one_hot = fluid.layers.elementwise_mul( - label_one_hot, weight) - probs = fluid.layers.softmax(logit) - loss = fluid.layers.cross_entropy( - probs, - weighted_label_one_hot, - soft_label=True, - ignore_index=ignore_index) - weighted_label_one_hot.stop_gradient = True - - loss = loss * ignore_mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(ignore_mask) + 0.00001) - - label.stop_gradient = True - ignore_mask.stop_gradient = True - return avg_loss - - -# to change, how to appicate ignore index and ignore mask -def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception( - "dice loss is only applicable to one channel classfication") - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.transpose(ignore_mask, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit = logit * ignore_mask - label = label * ignore_mask - reduce_dim = list(range(1, len(logit.shape))) - inse = fluid.layers.reduce_sum(logit * label, dim=reduce_dim) - dice_denominator = fluid.layers.reduce_sum( - logit, dim=reduce_dim) + fluid.layers.reduce_sum( - label, dim=reduce_dim) - dice_score = 1 - inse * 2 / (dice_denominator + epsilon) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return fluid.layers.reduce_mean(dice_score) - - -def bce_loss(logit, label, ignore_mask=None, ignore_index=255): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception("bce loss is only applicable to binary classfication") - label = fluid.layers.cast(label, 'float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logit, label=label, ignore_index=ignore_index, - normalize=True) # or False - loss = fluid.layers.reduce_sum(loss) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return loss diff --git a/legacy/contrib/HumanSeg/nets/shufflenet_slim.py b/legacy/contrib/HumanSeg/nets/shufflenet_slim.py deleted file mode 100644 index 4ac7dc0076..0000000000 --- a/legacy/contrib/HumanSeg/nets/shufflenet_slim.py +++ /dev/null @@ -1,262 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from .libs import sigmoid_to_softmax -from .seg_modules import softmax_with_loss -from .seg_modules import dice_loss -from .seg_modules import bce_loss - - -class ShuffleSeg(object): - # def __init__(self): - # self.params = train_parameters - def __init__(self, - num_classes, - mode='train', - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255): - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.mode = mode - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - - def _get_loss(self, logit, label, mask): - avg_loss = 0 - if not (self.use_dice_loss or self.use_bce_loss): - avg_loss += softmax_with_loss( - logit, - label, - mask, - num_classes=self.num_classes, - weight=self.class_weight, - ignore_index=self.ignore_index) - else: - if self.use_dice_loss: - avg_loss += dice_loss(logit, label, mask) - if self.use_bce_loss: - avg_loss += bce_loss( - logit, label, mask, ignore_index=self.ignore_index) - - return avg_loss - - def generate_inputs(self): - inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', shape=[None, 3, None, None], name='image') - if self.mode == 'train': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - return inputs - - def build_net(self, inputs): - if self.use_dice_loss or self.use_bce_loss: - self.num_classes = 1 - image = inputs['image'] - ## Encoder - conv1 = self.conv_bn(image, 3, 36, 2, 1) - print('encoder 1', conv1.shape) - shortcut = self.conv_bn( - input=conv1, filter_size=1, num_filters=18, stride=1, padding=0) - print('shortcut 1', shortcut.shape) - - pool = fluid.layers.pool2d( - input=conv1, - pool_size=3, - pool_type='max', - pool_stride=2, - pool_padding=1) - print('encoder 2', pool.shape) - - # Block 1 - conv = self.sfnetv2module(pool, stride=2, num_filters=72) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - print('encoder 3', conv.shape) - - # Block 2 - conv = self.sfnetv2module(conv, stride=2) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - conv = self.sfnetv2module(conv, stride=1) - print('encoder 4', conv.shape) - - ### decoder - conv = self.depthwise_separable(conv, 3, 64, 1) - shortcut_shape = fluid.layers.shape(shortcut)[2:] - conv_b = fluid.layers.resize_bilinear(conv, shortcut_shape) - concat = fluid.layers.concat([shortcut, conv_b], axis=1) - decode_conv = self.depthwise_separable(concat, 3, 64, 1) - logit = self.output_layer(decode_conv, self.num_classes) - - if self.num_classes == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if self.mode == 'train': - label = inputs['label'] - mask = label != self.ignore_index - return self._get_loss(logit, label, mask) - - else: - if self.num_classes == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = fluid.layers.softmax(logit, axis=1) - return pred, logit - - return logit - - def conv_bn(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - act='relu', - use_cudnn=True): - parameter_attr = ParamAttr(learning_rate=1, initializer=MSRA()) - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=parameter_attr, - bias_attr=False) - return fluid.layers.batch_norm(input=conv, act=act) - - def depthwise_separable(self, input, filter_size, num_filters, stride): - num_filters1 = int(input.shape[1]) - num_groups = num_filters1 - depthwise_conv = self.conv_bn( - input=input, - filter_size=filter_size, - num_filters=int(num_filters1), - stride=stride, - padding=int(filter_size / 2), - num_groups=num_groups, - use_cudnn=False, - act=None) - - pointwise_conv = self.conv_bn( - input=depthwise_conv, - filter_size=1, - num_filters=num_filters, - stride=1, - padding=0) - return pointwise_conv - - def sfnetv2module(self, input, stride, num_filters=None): - if stride == 1: - shortcut, branch = fluid.layers.split( - input, num_or_sections=2, dim=1) - if num_filters is None: - in_channels = int(branch.shape[1]) - else: - in_channels = int(num_filters / 2) - else: - branch = input - if num_filters is None: - in_channels = int(branch.shape[1]) - else: - in_channels = int(num_filters / 2) - shortcut = self.depthwise_separable(input, 3, in_channels, stride) - branch_1x1 = self.conv_bn( - input=branch, - filter_size=1, - num_filters=int(in_channels), - stride=1, - padding=0) - branch_dw1x1 = self.depthwise_separable(branch_1x1, 3, in_channels, - stride) - output = fluid.layers.concat(input=[shortcut, branch_dw1x1], axis=1) - - # channel shuffle - # b, c, h, w = output.shape - shape = fluid.layers.shape(output) - c = output.shape[1] - b, h, w = shape[0], shape[2], shape[3] - output = fluid.layers.reshape(x=output, shape=[b, 2, in_channels, h, w]) - output = fluid.layers.transpose(x=output, perm=[0, 2, 1, 3, 4]) - output = fluid.layers.reshape(x=output, shape=[b, c, h, w]) - return output - - def output_layer(self, input, out_dim): - param_attr = fluid.param_attr.ParamAttr( - learning_rate=1., - regularizer=fluid.regularizer.L2Decay(0.), - initializer=fluid.initializer.Xavier()) - # deconv - output = fluid.layers.conv2d_transpose( - input=input, - num_filters=out_dim, - filter_size=2, - padding=0, - stride=2, - bias_attr=True, - param_attr=param_attr, - act=None) - return output diff --git a/legacy/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py b/legacy/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py deleted file mode 100644 index 0b74c5ebf4..0000000000 --- a/legacy/contrib/HumanSeg/pretrained_weights/download_pretrained_weights.py +++ /dev/null @@ -1,52 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "../../../", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - -model_urls = { - "humanseg_server_ckpt": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_ckpt.zip", - "humanseg_server_inference": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_server_inference.zip", - "humanseg_mobile_ckpt": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_ckpt.zip", - "humanseg_mobile_inference": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_inference.zip", - "humanseg_mobile_quant": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_mobile_quant.zip", - "humanseg_lite_ckpt": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_ckpt.zip", - "humanseg_lite_inference": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_inference.zip", - "humanseg_lite_quant": - "https://paddleseg.bj.bcebos.com/humanseg/models/humanseg_lite_quant.zip", -} - -if __name__ == "__main__": - for model_name, url in model_urls.items(): - download_file_and_uncompress( - url=url, - savepath=LOCAL_PATH, - extrapath=LOCAL_PATH, - extraname=model_name) - - print("Pretrained Model download success!") diff --git a/legacy/contrib/HumanSeg/quant_offline.py b/legacy/contrib/HumanSeg/quant_offline.py deleted file mode 100644 index 163b770680..0000000000 --- a/legacy/contrib/HumanSeg/quant_offline.py +++ /dev/null @@ -1,95 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from datasets.dataset import Dataset -import transforms -import models - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg training') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for quant', - type=str, - default='output/best_model') - parser.add_argument( - '--batch_size', - dest='batch_size', - help='Mini batch size', - type=int, - default=1) - parser.add_argument( - '--batch_nums', - dest='batch_nums', - help='Batch number for quant', - type=int, - default=10) - parser.add_argument( - '--data_dir', - dest='data_dir', - help='the root directory of dataset', - type=str) - parser.add_argument( - '--quant_list', - dest='quant_list', - help= - 'Image file list for model quantization, it can be vat.txt or train.txt', - type=str, - default=None) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the quant model', - type=str, - default='./output/quant_offline') - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - return parser.parse_args() - - -def evaluate(args): - eval_transforms = transforms.Compose( - [transforms.Resize(args.image_shape), - transforms.Normalize()]) - - eval_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.quant_list, - transforms=eval_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=False) - - model = models.load_model(args.model_dir) - model.export_quant_model( - dataset=eval_dataset, - save_dir=args.save_dir, - batch_size=args.batch_size, - batch_nums=args.batch_nums) - - -if __name__ == '__main__': - args = parse_args() - - evaluate(args) diff --git a/legacy/contrib/HumanSeg/quant_online.py b/legacy/contrib/HumanSeg/quant_online.py deleted file mode 100644 index 2bf6051856..0000000000 --- a/legacy/contrib/HumanSeg/quant_online.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from datasets.dataset import Dataset -from models import HumanSegMobile, HumanSegLite, HumanSegServer -import transforms - -MODEL_TYPE = ['HumanSegMobile', 'HumanSegLite', 'HumanSegServer'] - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg training') - parser.add_argument( - '--model_type', - dest='model_type', - help= - "Model type for traing, which is one of ('HumanSegMobile', 'HumanSegLite', 'HumanSegServer')", - type=str, - default='HumanSegMobile') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='The root directory of dataset', - type=str) - parser.add_argument( - '--train_list', - dest='train_list', - help='Train list file of dataset', - type=str) - parser.add_argument( - '--val_list', - dest='val_list', - help='Val list file of dataset', - type=str, - default=None) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the model snapshot', - type=str, - default='./output/quant_train') - parser.add_argument( - '--num_classes', - dest='num_classes', - help='Number of classes', - type=int, - default=2) - parser.add_argument( - '--num_epochs', - dest='num_epochs', - help='Number epochs for training', - type=int, - default=2) - parser.add_argument( - '--batch_size', - dest='batch_size', - help='Mini batch size', - type=int, - default=128) - parser.add_argument( - '--learning_rate', - dest='learning_rate', - help='Learning rate', - type=float, - default=0.001) - parser.add_argument( - '--pretrained_weights', - dest='pretrained_weights', - help='The model path for quant', - type=str, - default=None) - parser.add_argument( - '--save_interval_epochs', - dest='save_interval_epochs', - help='The interval epochs for save a model snapshot', - type=int, - default=1) - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - - return parser.parse_args() - - -def train(args): - train_transforms = transforms.Compose([ - transforms.RandomHorizontalFlip(), - transforms.Resize(args.image_shape), - transforms.Normalize() - ]) - - eval_transforms = transforms.Compose( - [transforms.Resize(args.image_shape), - transforms.Normalize()]) - - train_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.train_list, - transforms=train_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=True) - - eval_dataset = None - if args.val_list is not None: - eval_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.val_list, - transforms=eval_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=False) - - if args.model_type == 'HumanSegMobile': - model = HumanSegMobile(num_classes=2) - elif args.model_type == 'HumanSegLite': - model = HumanSegLite(num_classes=2) - elif args.model_type == 'HumanSegServer': - model = HumanSegServer(num_classes=2) - else: - raise ValueError( - "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', " - "'HumanSegLite', 'HumanSegServer')".format(args.model_type)) - model.train( - num_epochs=args.num_epochs, - train_dataset=train_dataset, - train_batch_size=args.batch_size, - eval_dataset=eval_dataset, - save_interval_epochs=args.save_interval_epochs, - save_dir=args.save_dir, - pretrained_weights=args.pretrained_weights, - learning_rate=args.learning_rate, - quant=True) - - -if __name__ == '__main__': - args = parse_args() - train(args) diff --git a/legacy/contrib/HumanSeg/requirements.txt b/legacy/contrib/HumanSeg/requirements.txt deleted file mode 100644 index eb247d0787..0000000000 --- a/legacy/contrib/HumanSeg/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -visualdl >= 2.0.0b1 -paddleslim diff --git a/legacy/contrib/HumanSeg/train.py b/legacy/contrib/HumanSeg/train.py deleted file mode 100644 index 3f5f89f3dc..0000000000 --- a/legacy/contrib/HumanSeg/train.py +++ /dev/null @@ -1,169 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from datasets.dataset import Dataset -from models import HumanSegMobile, HumanSegLite, HumanSegServer -import transforms - -MODEL_TYPE = ['HumanSegMobile', 'HumanSegLite', 'HumanSegServer'] - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg training') - parser.add_argument( - '--model_type', - dest='model_type', - help= - "Model type for traing, which is one of ('HumanSegMobile', 'HumanSegLite', 'HumanSegServer')", - type=str, - default='HumanSegMobile') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='The root directory of dataset', - type=str) - parser.add_argument( - '--train_list', - dest='train_list', - help='Train list file of dataset', - type=str) - parser.add_argument( - '--val_list', - dest='val_list', - help='Val list file of dataset', - type=str, - default=None) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the model snapshot', - type=str, - default='./output') - parser.add_argument( - '--num_classes', - dest='num_classes', - help='Number of classes', - type=int, - default=2) - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - parser.add_argument( - '--num_epochs', - dest='num_epochs', - help='Number epochs for training', - type=int, - default=100) - parser.add_argument( - '--batch_size', - dest='batch_size', - help='Mini batch size', - type=int, - default=128) - parser.add_argument( - '--learning_rate', - dest='learning_rate', - help='Learning rate', - type=float, - default=0.01) - parser.add_argument( - '--pretrained_weights', - dest='pretrained_weights', - help='The path of pretrianed weight', - type=str, - default=None) - parser.add_argument( - '--resume_weights', - dest='resume_weights', - help='The path of resume weight', - type=str, - default=None) - parser.add_argument( - '--use_vdl', - dest='use_vdl', - help='Whether to use visualdl', - action='store_true') - parser.add_argument( - '--save_interval_epochs', - dest='save_interval_epochs', - help='The interval epochs for save a model snapshot', - type=int, - default=5) - - return parser.parse_args() - - -def train(args): - train_transforms = transforms.Compose([ - transforms.Resize(args.image_shape), - transforms.RandomHorizontalFlip(), - transforms.Normalize() - ]) - - eval_transforms = transforms.Compose( - [transforms.Resize(args.image_shape), - transforms.Normalize()]) - - train_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.train_list, - transforms=train_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=True) - - eval_dataset = None - if args.val_list is not None: - eval_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.val_list, - transforms=eval_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=False) - - if args.model_type == 'HumanSegMobile': - model = HumanSegMobile(num_classes=2) - elif args.model_type == 'HumanSegLite': - model = HumanSegLite(num_classes=2) - elif args.model_type == 'HumanSegServer': - model = HumanSegServer(num_classes=2) - else: - raise ValueError( - "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', " - "'HumanSegLite', 'HumanSegServer')".format(args.model_type)) - model.train( - num_epochs=args.num_epochs, - train_dataset=train_dataset, - train_batch_size=args.batch_size, - eval_dataset=eval_dataset, - save_interval_epochs=args.save_interval_epochs, - save_dir=args.save_dir, - pretrained_weights=args.pretrained_weights, - resume_weights=args.resume_weights, - learning_rate=args.learning_rate, - use_vdl=args.use_vdl) - - -if __name__ == '__main__': - args = parse_args() - train(args) diff --git a/legacy/contrib/HumanSeg/transforms/functional.py b/legacy/contrib/HumanSeg/transforms/functional.py deleted file mode 100644 index c3f0265b0e..0000000000 --- a/legacy/contrib/HumanSeg/transforms/functional.py +++ /dev/null @@ -1,100 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import numpy as np -from PIL import Image, ImageEnhance - - -def normalize(im, mean, std): - im = im.astype(np.float32, copy=False) / 255.0 - im -= mean - im /= std - return im - - -def permute(im): - im = np.transpose(im, (2, 0, 1)) - return im - - -def resize(im, target_size=608, interp=cv2.INTER_LINEAR): - if isinstance(target_size, list) or isinstance(target_size, tuple): - w = target_size[0] - h = target_size[1] - else: - w = target_size - h = target_size - im = cv2.resize(im, (w, h), interpolation=interp) - return im - - -def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): - value = max(im.shape[0], im.shape[1]) - scale = float(long_size) / float(value) - resized_width = int(round(im.shape[1] * scale)) - resized_height = int(round(im.shape[0] * scale)) - - im = cv2.resize( - im, (resized_width, resized_height), interpolation=interpolation) - return im - - -def horizontal_flip(im): - if len(im.shape) == 3: - im = im[:, ::-1, :] - elif len(im.shape) == 2: - im = im[:, ::-1] - return im - - -def vertical_flip(im): - if len(im.shape) == 3: - im = im[::-1, :, :] - elif len(im.shape) == 2: - im = im[::-1, :] - return im - - -def brightness(im, brightness_lower, brightness_upper): - brightness_delta = np.random.uniform(brightness_lower, brightness_upper) - im = ImageEnhance.Brightness(im).enhance(brightness_delta) - return im - - -def contrast(im, contrast_lower, contrast_upper): - contrast_delta = np.random.uniform(contrast_lower, contrast_upper) - im = ImageEnhance.Contrast(im).enhance(contrast_delta) - return im - - -def saturation(im, saturation_lower, saturation_upper): - saturation_delta = np.random.uniform(saturation_lower, saturation_upper) - im = ImageEnhance.Color(im).enhance(saturation_delta) - return im - - -def hue(im, hue_lower, hue_upper): - hue_delta = np.random.uniform(hue_lower, hue_upper) - im = np.array(im.convert('HSV')) - im[:, :, 0] = im[:, :, 0] + hue_delta - im = Image.fromarray(im, mode='HSV').convert('RGB') - return im - - -def rotate(im, rotate_lower, rotate_upper): - rotate_delta = np.random.uniform(rotate_lower, rotate_upper) - im = im.rotate(int(rotate_delta)) - return im diff --git a/legacy/contrib/HumanSeg/transforms/transforms.py b/legacy/contrib/HumanSeg/transforms/transforms.py deleted file mode 100644 index 261b29781f..0000000000 --- a/legacy/contrib/HumanSeg/transforms/transforms.py +++ /dev/null @@ -1,915 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .functional import * -import random -import numpy as np -from PIL import Image -import cv2 -from collections import OrderedDict - - -class Compose: - """根据数据预处理/增强算子对输入数据进行操作。 - 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - - Args: - transforms (list): 数据预处理/增强算子。 - to_rgb (bool): 是否转化为rgb通道格式 - - Raises: - TypeError: transforms不是list对象 - ValueError: transforms元素个数小于1。 - - """ - - def __init__(self, transforms, to_rgb=False): - if not isinstance(transforms, list): - raise TypeError('The transforms must be a list!') - if len(transforms) < 1: - raise ValueError('The length of transforms ' + \ - 'must be equal or larger than 1!') - self.transforms = transforms - self.to_rgb = to_rgb - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (str/np.ndarray): 图像路径/图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息,dict中的字段如下: - - shape_before_resize (tuple): 图像resize之前的大小(h, w)。 - - shape_before_padding (tuple): 图像padding之前的大小(h, w)。 - label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 - - Returns: - tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 - """ - - if im_info is None: - im_info = dict() - if isinstance(im, str): - im = cv2.imread(im).astype('float32') - if isinstance(label, str): - label = np.asarray(Image.open(label)) - if im is None: - raise ValueError('Can\'t read The image file {}!'.format(im)) - if self.to_rgb: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - - for op in self.transforms: - outputs = op(im, im_info, label) - im = outputs[0] - if len(outputs) >= 2: - im_info = outputs[1] - if len(outputs) == 3: - label = outputs[2] - return outputs - - -class RandomHorizontalFlip: - """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机水平翻转的概率。默认值为0.5。 - - """ - - def __init__(self, prob=0.5): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if random.random() < self.prob: - im = horizontal_flip(im) - if label is not None: - label = horizontal_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomVerticalFlip: - """以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机垂直翻转的概率。默认值为0.1。 - """ - - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if random.random() < self.prob: - im = vertical_flip(im) - if label is not None: - label = vertical_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Resize: - """调整图像大小(resize)。 - - - 当目标大小(target_size)类型为int时,根据插值方式, - 将图像resize为[target_size, target_size]。 - - 当目标大小(target_size)类型为list或tuple时,根据插值方式, - 将图像resize为target_size。 - 注意:当插值方式为“RANDOM”时,则随机选取一种插值方式进行resize。 - - Args: - target_size (int/list/tuple): 短边目标长度。默认为608。 - interp (str): resize的插值方式,与opencv的插值方式对应,取值范围为 - ['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM']。默认为"LINEAR"。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 插值方式不在['NEAREST', 'LINEAR', 'CUBIC', - 'AREA', 'LANCZOS4', 'RANDOM']中。 - """ - - # The interpolation mode - interp_dict = { - 'NEAREST': cv2.INTER_NEAREST, - 'LINEAR': cv2.INTER_LINEAR, - 'CUBIC': cv2.INTER_CUBIC, - 'AREA': cv2.INTER_AREA, - 'LANCZOS4': cv2.INTER_LANCZOS4 - } - - def __init__(self, target_size=512, interp='LINEAR'): - self.interp = interp - if not (interp == "RANDOM" or interp in self.interp_dict): - raise ValueError("interp should be one of {}".format( - self.interp_dict.keys())) - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise TypeError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - - self.target_size = target_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict, 可选): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info跟新字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - - Raises: - TypeError: 形参数据类型不满足需求。 - ValueError: 数据长度不匹配。 - """ - if im_info is None: - im_info = OrderedDict() - im_info['shape_before_resize'] = im.shape[:2] - if not isinstance(im, np.ndarray): - raise TypeError("Resize: image type is not numpy.") - if len(im.shape) != 3: - raise ValueError('Resize: image is not 3-dimensional.') - if self.interp == "RANDOM": - interp = random.choice(list(self.interp_dict.keys())) - else: - interp = self.interp - im = resize(im, self.target_size, self.interp_dict[interp]) - if label is not None: - label = resize(label, self.target_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeByLong: - """对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - long_size (int): resize后图像的长边大小。 - """ - - def __init__(self, long_size): - self.long_size = long_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - """ - if im_info is None: - im_info = OrderedDict() - - im_info['shape_before_resize'] = im.shape[:2] - im = resize_long(im, self.long_size) - if label is not None: - label = resize_long(label, self.long_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeRangeScaling: - """对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - min_value (int): 图像长边resize后的最小值。默认值400。 - max_value (int): 图像长边resize后的最大值。默认值600。 - - Raises: - ValueError: min_value大于max_value - """ - - def __init__(self, min_value=400, max_value=600): - if min_value > max_value: - raise ValueError('min_value must be less than max_value, ' - 'but they are {} and {}.'.format( - min_value, max_value)) - self.min_value = min_value - self.max_value = max_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_value == self.max_value: - random_size = self.max_value - else: - random_size = int( - np.random.uniform(self.min_value, self.max_value) + 0.5) - im = resize_long(im, random_size, cv2.INTER_LINEAR) - if label is not None: - label = resize_long(label, random_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeStepScaling: - """对图像按照某一个比例resize,这个比例以scale_step_size为步长 - 在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。 - - Args: - min_scale_factor(float), resize最小尺度。默认值0.75。 - max_scale_factor (float), resize最大尺度。默认值1.25。 - scale_step_size (float), resize尺度范围间隔。默认值0.25。 - - Raises: - ValueError: min_scale_factor大于max_scale_factor - """ - - def __init__(self, - min_scale_factor=0.75, - max_scale_factor=1.25, - scale_step_size=0.25): - if min_scale_factor > max_scale_factor: - raise ValueError( - 'min_scale_factor must be less than max_scale_factor, ' - 'but they are {} and {}.'.format(min_scale_factor, - max_scale_factor)) - self.min_scale_factor = min_scale_factor - self.max_scale_factor = max_scale_factor - self.scale_step_size = scale_step_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_scale_factor == self.max_scale_factor: - scale_factor = self.min_scale_factor - - elif self.scale_step_size == 0: - scale_factor = np.random.uniform(self.min_scale_factor, - self.max_scale_factor) - - else: - num_steps = int((self.max_scale_factor - self.min_scale_factor) / - self.scale_step_size + 1) - scale_factors = np.linspace(self.min_scale_factor, - self.max_scale_factor, - num_steps).tolist() - np.random.shuffle(scale_factors) - scale_factor = scale_factors[0] - w = int(round(scale_factor * im.shape[1])) - h = int(round(scale_factor * im.shape[0])) - - im = resize(im, (w, h), cv2.INTER_LINEAR) - if label is not None: - label = resize(label, (w, h), cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Normalize: - """对图像进行标准化。 - 1.尺度缩放到 [0,1]。 - 2.对图像进行减均值除以标准差操作。 - - Args: - mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。 - std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。 - - Raises: - ValueError: mean或std不是list对象。std包含0。 - """ - - def __init__(self, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]): - self.mean = mean - self.std = std - if not (isinstance(self.mean, list) and isinstance(self.std, list)): - raise ValueError("{}: input type is invalid.".format(self)) - from functools import reduce - if reduce(lambda x, y: x * y, self.std) == 0: - raise ValueError('{}: std is invalid!'.format(self)) - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - im = normalize(im, mean, std) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Padding: - """对图像或标注图像进行padding,padding方向为右和下。 - 根据提供的值对图像或标注图像进行padding操作。 - - Args: - target_size (int|list|tuple): padding后图像的大小。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: target_size不是int|list|tuple。 - ValueError: target_size为list|tuple时元素个数不等于2。 - """ - - def __init__(self, - target_size, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise ValueError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - self.target_size = target_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。 - - Raises: - ValueError: 输入图像im或label的形状大于目标值 - """ - if im_info is None: - im_info = OrderedDict() - im_info['shape_before_padding'] = im.shape[:2] - - im_height, im_width = im.shape[0], im.shape[1] - if isinstance(self.target_size, int): - target_height = self.target_size - target_width = self.target_size - else: - target_height = self.target_size[1] - target_width = self.target_size[0] - pad_height = target_height - im_height - pad_width = target_width - im_width - if pad_height < 0 or pad_width < 0: - raise ValueError( - 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' - .format(im_width, im_height, target_width, target_height)) - else: - im = cv2.copyMakeBorder( - im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomPaddingCrop: - """对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。 - - Args: - crop_size (int|list|tuple): 裁剪图像大小。默认为512。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: crop_size不是int/list/tuple。 - ValueError: target_size为list/tuple时元素个数不等于2。 - """ - - def __init__(self, - crop_size=512, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - if isinstance(crop_size, list) or isinstance(crop_size, tuple): - if len(crop_size) != 2: - raise ValueError( - 'when crop_size is list or tuple, it should include 2 elements, but it is {}' - .format(crop_size)) - elif not isinstance(crop_size, int): - raise TypeError( - "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(crop_size))) - self.crop_size = crop_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if isinstance(self.crop_size, int): - crop_width = self.crop_size - crop_height = self.crop_size - else: - crop_width = self.crop_size[0] - crop_height = self.crop_size[1] - - img_height = im.shape[0] - img_width = im.shape[1] - - if img_height == crop_height and img_width == crop_width: - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - else: - pad_height = max(crop_height - img_height, 0) - pad_width = max(crop_width - img_width, 0) - if (pad_height > 0 or pad_width > 0): - im = cv2.copyMakeBorder( - im, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.im_padding_value) - if label is not None: - label = cv2.copyMakeBorder( - label, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=self.label_padding_value) - img_height = im.shape[0] - img_width = im.shape[1] - - if crop_height > 0 and crop_width > 0: - h_off = np.random.randint(img_height - crop_height + 1) - w_off = np.random.randint(img_width - crop_width + 1) - - im = im[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] - if label is not None: - label = label[h_off:(crop_height + h_off), w_off:( - w_off + crop_width)] - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomBlur: - """以一定的概率对图像进行高斯模糊。 - - Args: - prob (float): 图像模糊概率。默认为0.1。 - """ - - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.prob <= 0: - n = 0 - elif self.prob >= 1: - n = 1 - else: - n = int(1.0 / self.prob) - if n > 0: - if np.random.randint(0, n) == 0: - radius = np.random.randint(3, 10) - if radius % 2 != 1: - radius = radius + 1 - if radius > 9: - radius = 9 - im = cv2.GaussianBlur(im, (radius, radius), 0, 0) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomRotation: - """对图像进行随机旋转。 - 在不超过最大旋转角度的情况下,图像进行随机旋转,当存在标注图像时,同步进行, - 并对旋转后的图像和标注图像进行相应的padding。 - - Args: - max_rotation (float): 最大旋转角度。默认为15度。 - im_padding_value (list): 图像padding的值。默认为[127.5, 127.5, 127.5]。 - label_padding_value (int): 标注图像padding的值。默认为255。 - - """ - - def __init__(self, - max_rotation=15, - im_padding_value=[127.5, 127.5, 127.5], - label_padding_value=255): - self.max_rotation = max_rotation - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.max_rotation > 0: - (h, w) = im.shape[:2] - do_rotation = np.random.uniform(-self.max_rotation, - self.max_rotation) - pc = (w // 2, h // 2) - r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) - cos = np.abs(r[0, 0]) - sin = np.abs(r[0, 1]) - - nw = int((h * sin) + (w * cos)) - nh = int((h * cos) + (w * sin)) - - (cx, cy) = pc - r[0, 2] += (nw / 2) - cx - r[1, 2] += (nh / 2) - cy - dsize = (nw, nh) - im = cv2.warpAffine( - im, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.im_padding_value) - label = cv2.warpAffine( - label, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.label_padding_value) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomScaleAspect: - """裁剪并resize回原始尺寸的图像和标注图像。 - 按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 - - Args: - min_scale (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。 - aspect_ratio (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。 - """ - - def __init__(self, min_scale=0.5, aspect_ratio=0.33): - self.min_scale = min_scale - self.aspect_ratio = aspect_ratio - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_scale != 0 and self.aspect_ratio != 0: - img_height = im.shape[0] - img_width = im.shape[1] - for i in range(0, 10): - area = img_height * img_width - target_area = area * np.random.uniform(self.min_scale, 1.0) - aspectRatio = np.random.uniform(self.aspect_ratio, - 1.0 / self.aspect_ratio) - - dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) - dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) - if (np.random.randint(10) < 5): - tmp = dw - dw = dh - dh = tmp - - if (dh < img_height and dw < img_width): - h1 = np.random.randint(0, img_height - dh) - w1 = np.random.randint(0, img_width - dw) - - im = im[h1:(h1 + dh), w1:(w1 + dw), :] - label = label[h1:(h1 + dh), w1:(w1 + dw)] - im = cv2.resize( - im, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - label = cv2.resize( - label, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) - break - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomDistort: - """对图像进行随机失真。 - - 1. 对变换的操作顺序进行随机化操作。 - 2. 按照1中的顺序以一定的概率对图像进行随机像素内容变换。 - - Args: - brightness_range (float): 明亮度因子的范围。默认为0.5。 - brightness_prob (float): 随机调整明亮度的概率。默认为0.5。 - contrast_range (float): 对比度因子的范围。默认为0.5。 - contrast_prob (float): 随机调整对比度的概率。默认为0.5。 - saturation_range (float): 饱和度因子的范围。默认为0.5。 - saturation_prob (float): 随机调整饱和度的概率。默认为0.5。 - hue_range (int): 色调因子的范围。默认为18。 - hue_prob (float): 随机调整色调的概率。默认为0.5。 - """ - - def __init__(self, - brightness_range=0.5, - brightness_prob=0.5, - contrast_range=0.5, - contrast_prob=0.5, - saturation_range=0.5, - saturation_prob=0.5, - hue_range=18, - hue_prob=0.5): - self.brightness_range = brightness_range - self.brightness_prob = brightness_prob - self.contrast_range = contrast_range - self.contrast_prob = contrast_prob - self.saturation_range = saturation_range - self.saturation_prob = saturation_prob - self.hue_range = hue_range - self.hue_prob = hue_prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - brightness_lower = 1 - self.brightness_range - brightness_upper = 1 + self.brightness_range - contrast_lower = 1 - self.contrast_range - contrast_upper = 1 + self.contrast_range - saturation_lower = 1 - self.saturation_range - saturation_upper = 1 + self.saturation_range - hue_lower = -self.hue_range - hue_upper = self.hue_range - ops = [brightness, contrast, saturation, hue] - random.shuffle(ops) - params_dict = { - 'brightness': { - 'brightness_lower': brightness_lower, - 'brightness_upper': brightness_upper - }, - 'contrast': { - 'contrast_lower': contrast_lower, - 'contrast_upper': contrast_upper - }, - 'saturation': { - 'saturation_lower': saturation_lower, - 'saturation_upper': saturation_upper - }, - 'hue': { - 'hue_lower': hue_lower, - 'hue_upper': hue_upper - } - } - prob_dict = { - 'brightness': self.brightness_prob, - 'contrast': self.contrast_prob, - 'saturation': self.saturation_prob, - 'hue': self.hue_prob - } - im = im.astype('uint8') - im = Image.fromarray(im) - for id in range(4): - params = params_dict[ops[id].__name__] - prob = prob_dict[ops[id].__name__] - params['im'] = im - if np.random.uniform(0, 1) < prob: - im = ops[id](**params) - im = np.asarray(im).astype('float32') - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ArrangeSegmenter: - """获取训练/验证/预测所需的信息。 - - Args: - mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 - - Raises: - ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内 - """ - - def __init__(self, mode): - if mode not in ['train', 'eval', 'test', 'quant']: - raise ValueError( - "mode should be defined as one of ['train', 'eval', 'test', 'quant']!" - ) - self.mode = mode - - def __call__(self, im, im_info, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为 - 'quant'时,返回的tuple为(im,),为图像np.ndarray数据。 - """ - im = permute(im) - if self.mode == 'train' or self.mode == 'eval': - label = label[np.newaxis, :, :] - return (im, label) - elif self.mode == 'test': - return (im, im_info) - else: - return (im, ) diff --git a/legacy/contrib/HumanSeg/utils/__init__.py b/legacy/contrib/HumanSeg/utils/__init__.py deleted file mode 100644 index d196c743e8..0000000000 --- a/legacy/contrib/HumanSeg/utils/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import logging -from . import humanseg_postprocess -from .metrics import ConfusionMatrix -from .utils import * -from .post_quantization import HumanSegPostTrainingQuantization diff --git a/legacy/contrib/HumanSeg/utils/humanseg_postprocess.py b/legacy/contrib/HumanSeg/utils/humanseg_postprocess.py deleted file mode 100644 index cd4d18da65..0000000000 --- a/legacy/contrib/HumanSeg/utils/humanseg_postprocess.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - - -def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): - """计算光流跟踪匹配点和光流图 - 输入参数: - pre_gray: 上一帧灰度图 - cur_gray: 当前帧灰度图 - prev_cfd: 上一帧光流图 - dl_weights: 融合权重图 - disflow: 光流数据结构 - 返回值: - is_track: 光流点跟踪二值图,即是否具有光流点匹配 - track_cfd: 光流跟踪图 - """ - check_thres = 8 - h, w = pre_gray.shape[:2] - track_cfd = np.zeros_like(prev_cfd) - is_track = np.zeros_like(pre_gray) - flow_fw = disflow.calc(pre_gray, cur_gray, None) - flow_bw = disflow.calc(cur_gray, pre_gray, None) - flow_fw = np.round(flow_fw).astype(np.int) - flow_bw = np.round(flow_bw).astype(np.int) - y_list = np.array(range(h)) - x_list = np.array(range(w)) - yv, xv = np.meshgrid(y_list, x_list) - yv, xv = yv.T, xv.T - cur_x = xv + flow_fw[:, :, 0] - cur_y = yv + flow_fw[:, :, 1] - - # 超出边界不跟踪 - not_track = (cur_x < 0) + (cur_x >= w) + (cur_y < 0) + (cur_y >= h) - flow_bw[~not_track] = flow_bw[cur_y[~not_track], cur_x[~not_track]] - not_track += (np.square(flow_fw[:, :, 0] + flow_bw[:, :, 0]) + - np.square(flow_fw[:, :, 1] + flow_bw[:, :, 1])) >= check_thres - track_cfd[cur_y[~not_track], cur_x[~not_track]] = prev_cfd[~not_track] - - is_track[cur_y[~not_track], cur_x[~not_track]] = 1 - - not_flow = np.all( - np.abs(flow_fw) == 0, axis=-1) * np.all( - np.abs(flow_bw) == 0, axis=-1) - dl_weights[cur_y[not_flow], cur_x[not_flow]] = 0.05 - return track_cfd, is_track, dl_weights - - -def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): - """光流追踪图和人像分割结构融合 - 输入参数: - track_cfd: 光流追踪图 - dl_cfd: 当前帧分割结果 - dl_weights: 融合权重图 - is_track: 光流点匹配二值图 - 返回 - cur_cfd: 光流跟踪图和人像分割结果融合图 - """ - fusion_cfd = dl_cfd.copy() - is_track = is_track.astype(np.bool) - fusion_cfd[is_track] = dl_weights[is_track] * dl_cfd[is_track] + ( - 1 - dl_weights[is_track]) * track_cfd[is_track] - # 确定区域 - index_certain = ((dl_cfd > 0.9) + (dl_cfd < 0.1)) * is_track - index_less01 = (dl_weights < 0.1) * index_certain - fusion_cfd[index_less01] = 0.3 * dl_cfd[index_less01] + 0.7 * track_cfd[ - index_less01] - index_larger09 = (dl_weights >= 0.1) * index_certain - fusion_cfd[index_larger09] = 0.4 * dl_cfd[index_larger09] + 0.6 * track_cfd[ - index_larger09] - return fusion_cfd - - -def threshold_mask(img, thresh_bg, thresh_fg): - dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) - dst[np.where(dst > 1)] = 1 - dst[np.where(dst < 0)] = 0 - return dst.astype(np.float32) - - -def postprocess(cur_gray, scoremap, prev_gray, pre_cfd, disflow, is_init): - """光流优化 - Args: - cur_gray : 当前帧灰度图 - pre_gray : 前一帧灰度图 - pre_cfd :前一帧融合结果 - scoremap : 当前帧分割结果 - difflow : 光流 - is_init : 是否第一帧 - Returns: - fusion_cfd : 光流追踪图和预测结果融合图 - """ - h, w = scoremap.shape - cur_cfd = scoremap.copy() - - if is_init: - if h <= 64 or w <= 64: - disflow.setFinestScale(1) - elif h <= 160 or w <= 160: - disflow.setFinestScale(2) - else: - disflow.setFinestScale(3) - fusion_cfd = cur_cfd - else: - weights = np.ones((h, w), np.float32) * 0.3 - track_cfd, is_track, weights = human_seg_tracking( - prev_gray, cur_gray, pre_cfd, weights, disflow) - fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) - - return fusion_cfd diff --git a/legacy/contrib/HumanSeg/utils/logging.py b/legacy/contrib/HumanSeg/utils/logging.py deleted file mode 100644 index 6f0c25ff55..0000000000 --- a/legacy/contrib/HumanSeg/utils/logging.py +++ /dev/null @@ -1,47 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import os -import sys - -levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} -log_level = 2 - - -def log(level=2, message=""): - current_time = time.time() - time_array = time.localtime(current_time) - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) - if log_level >= level: - print("{} [{}]\t{}".format(current_time, levels[level], - message).encode("utf-8").decode("latin1")) - sys.stdout.flush() - - -def debug(message=""): - log(level=3, message=message) - - -def info(message=""): - log(level=2, message=message) - - -def warning(message=""): - log(level=1, message=message) - - -def error(message=""): - log(level=0, message=message) diff --git a/legacy/contrib/HumanSeg/utils/metrics.py b/legacy/contrib/HumanSeg/utils/metrics.py deleted file mode 100644 index f7bab020ab..0000000000 --- a/legacy/contrib/HumanSeg/utils/metrics.py +++ /dev/null @@ -1,145 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - - -class ConfusionMatrix(object): - """ - Confusion Matrix for segmentation evaluation - """ - - def __init__(self, num_classes=2, streaming=False): - self.confusion_matrix = np.zeros([num_classes, num_classes], - dtype='int64') - self.num_classes = num_classes - self.streaming = streaming - - def calculate(self, pred, label, ignore=None): - # If not in streaming mode, clear matrix everytime when call `calculate` - if not self.streaming: - self.zero_matrix() - - label = np.transpose(label, (0, 2, 3, 1)) - ignore = np.transpose(ignore, (0, 2, 3, 1)) - mask = np.array(ignore) == 1 - - label = np.asarray(label)[mask] - pred = np.asarray(pred)[mask] - one = np.ones_like(pred) - # Accumuate ([row=label, col=pred], 1) into sparse matrix - spm = csr_matrix((one, (label, pred)), - shape=(self.num_classes, self.num_classes)) - spm = spm.todense() - self.confusion_matrix += spm - - def zero_matrix(self): - """ Clear confusion matrix """ - self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], - dtype='int64') - - def mean_iou(self): - iou_list = [] - avg_iou = 0 - # TODO: use numpy sum axis api to simpliy - vji = np.zeros(self.num_classes, dtype=int) - vij = np.zeros(self.num_classes, dtype=int) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - for c in range(self.num_classes): - total = vji[c] + vij[c] - self.confusion_matrix[c][c] - if total == 0: - iou = 0 - else: - iou = float(self.confusion_matrix[c][c]) / total - avg_iou += iou - iou_list.append(iou) - avg_iou = float(avg_iou) / float(self.num_classes) - return np.array(iou_list), avg_iou - - def accuracy(self): - total = self.confusion_matrix.sum() - total_right = 0 - for c in range(self.num_classes): - total_right += self.confusion_matrix[c][c] - if total == 0: - avg_acc = 0 - else: - avg_acc = float(total_right) / total - - vij = np.zeros(self.num_classes, dtype=int) - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - acc_list = [] - for c in range(self.num_classes): - if vij[c] == 0: - acc = 0 - else: - acc = self.confusion_matrix[c][c] / float(vij[c]) - acc_list.append(acc) - return np.array(acc_list), avg_acc - - def kappa(self): - vji = np.zeros(self.num_classes) - vij = np.zeros(self.num_classes) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - total = self.confusion_matrix.sum() - - # avoid spillovers - # TODO: is it reasonable to hard code 10000.0? - total = float(total) / 10000.0 - vji = vji / 10000.0 - vij = vij / 10000.0 - - tp = 0 - tc = 0 - for c in range(self.num_classes): - tp += vji[c] * vij[c] - tc += self.confusion_matrix[c][c] - - tc = tc / 10000.0 - pe = tp / (total * total) - po = tc / total - - kappa = (po - pe) / (1 - pe) - return kappa diff --git a/legacy/contrib/HumanSeg/utils/post_quantization.py b/legacy/contrib/HumanSeg/utils/post_quantization.py deleted file mode 100644 index 6adb060920..0000000000 --- a/legacy/contrib/HumanSeg/utils/post_quantization.py +++ /dev/null @@ -1,280 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.fluid.contrib.slim.quantization.quantization_pass import QuantizationTransformPass -from paddle.fluid.contrib.slim.quantization.quantization_pass import AddQuantDequantPass -from paddle.fluid.contrib.slim.quantization.quantization_pass import _out_scale_op_list -from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization -import utils.logging as logging -import paddle.fluid as fluid -import os -import re -import numpy as np -import time - - -class HumanSegPostTrainingQuantization(PostTrainingQuantization): - def __init__(self, - executor, - dataset, - program, - inputs, - outputs, - batch_size=10, - batch_nums=None, - scope=None, - algo="KL", - quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], - is_full_quantize=False, - is_use_cache_file=False, - cache_dir="./temp_post_training"): - ''' - The class utilizes post training quantization methon to quantize the - fp32 model. It uses calibrate data to calculate the scale factor of - quantized variables, and inserts fake quant/dequant op to obtain the - quantized model. - Args: - executor(fluid.Executor): The executor to load, run and save the - quantized model. - dataset(Python Iterator): The data Reader. - program(fluid.Program): The paddle program, save the parameters for model. - inputs(dict): The input of prigram. - outputs(dict): The output of program. - batch_size(int, optional): The batch size of DataLoader. Default is 10. - batch_nums(int, optional): If batch_nums is not None, the number of - calibrate data is batch_size*batch_nums. If batch_nums is None, use - all data provided by sample_generator as calibrate data. - scope(fluid.Scope, optional): The scope of the program, use it to load - and save variables. If scope=None, get scope by global_scope(). - algo(str, optional): If algo=KL, use KL-divergenc method to - get the more precise scale factor. If algo='direct', use - abs_max methon to get the scale factor. Default is KL. - quantizable_op_type(list[str], optional): List the type of ops - that will be quantized. Default is ["conv2d", "depthwise_conv2d", - "mul"]. - is_full_quantized(bool, optional): If set is_full_quantized as True, - apply quantization to all supported quantizable op type. If set - is_full_quantized as False, only apply quantization to the op type - according to the input quantizable_op_type. - is_use_cache_file(bool, optional): If set is_use_cache_file as False, - all temp data will be saved in memory. If set is_use_cache_file as True, - it will save temp data to disk. When the fp32 model is complex or - the number of calibrate data is large, we should set is_use_cache_file - as True. Defalut is False. - cache_dir(str, optional): When is_use_cache_file is True, set cache_dir as - the directory for saving temp data. Default is ./temp_post_training. - Returns: - None - ''' - self._support_activation_quantize_type = [ - 'range_abs_max', 'moving_average_abs_max', 'abs_max' - ] - self._support_weight_quantize_type = ['abs_max', 'channel_wise_abs_max'] - self._support_algo_type = ['KL', 'abs_max', 'min_max'] - self._support_quantize_op_type = \ - list(set(QuantizationTransformPass._supported_quantizable_op_type + - AddQuantDequantPass._supported_quantizable_op_type)) - - # Check inputs - assert executor is not None, "The executor cannot be None." - assert batch_size > 0, "The batch_size should be greater than 0." - assert algo in self._support_algo_type, \ - "The algo should be KL, abs_max or min_max." - - self._executor = executor - self._dataset = dataset - self._batch_size = batch_size - self._batch_nums = batch_nums - self._scope = fluid.global_scope() if scope == None else scope - self._algo = algo - self._is_use_cache_file = is_use_cache_file - self._cache_dir = cache_dir - self._activation_bits = 8 - self._weight_bits = 8 - self._activation_quantize_type = 'range_abs_max' - self._weight_quantize_type = 'channel_wise_abs_max' - if self._is_use_cache_file and not os.path.exists(self._cache_dir): - os.mkdir(self._cache_dir) - - if is_full_quantize: - self._quantizable_op_type = self._support_quantize_op_type - else: - self._quantizable_op_type = quantizable_op_type - for op_type in self._quantizable_op_type: - assert op_type in self._support_quantize_op_type + \ - AddQuantDequantPass._activation_type, \ - op_type + " is not supported for quantization." - - self._place = self._executor.place - self._program = program - self._feed_list = list(inputs.values()) - self._fetch_list = list(outputs.values()) - self._data_loader = None - - self._out_scale_op_list = _out_scale_op_list - self._bit_length = 8 - self._quantized_weight_var_name = set() - self._quantized_act_var_name = set() - self._sampling_data = {} - self._quantized_var_kl_threshold = {} - self._quantized_var_min = {} - self._quantized_var_max = {} - self._quantized_var_abs_max = {} - - def quantize(self): - ''' - Quantize the fp32 model. Use calibrate data to calculate the scale factor of - quantized variables, and inserts fake quant/dequant op to obtain the - quantized model. - Args: - None - Returns: - the program of quantized model. - ''' - self._load_model_data() - self._collect_target_varnames() - self._set_activation_persistable() - batch_ct = 0 - for data in self._data_loader(): - batch_ct += 1 - if self._batch_nums and batch_ct >= self._batch_nums: - break - batch_id = 0 - logging.info("Start to run batch!") - for data in self._data_loader(): - start = time.time() - self._executor.run( - program=self._program, - feed=data, - fetch_list=self._fetch_list, - return_numpy=False) - if self._algo == "KL": - self._sample_data(batch_id) - else: - self._sample_threshold() - end = time.time() - logging.debug( - '[Run batch data] Batch={}/{}, time_each_batch={} s.'.format( - str(batch_id + 1), str(batch_ct), str(end - start))) - batch_id += 1 - if self._batch_nums and batch_id >= self._batch_nums: - break - logging.info("All run batch: ".format(batch_id)) - self._reset_activation_persistable() - logging.info("Calculate scale factor ...") - if self._algo == "KL": - self._calculate_kl_threshold() - logging.info("Update the program ...") - if self._algo in ["KL", "abs_max"]: - self._update_program() - else: - self._save_input_threhold() - logging.info("Save ...") - self._save_output_threshold() - logging.info("Finish quant!") - return self._program - - def save_quantized_model(self, save_model_path): - ''' - Save the quantized model to the disk. - Args: - save_model_path(str): The path to save the quantized model - Returns: - None - ''' - feed_vars_names = [var.name for var in self._feed_list] - fluid.io.save_inference_model( - dirname=save_model_path, - feeded_var_names=feed_vars_names, - target_vars=self._fetch_list, - executor=self._executor, - params_filename='__params__', - main_program=self._program) - - def _load_model_data(self): - ''' - Set data loader. - ''' - feed_vars = [fluid.framework._get_var(var.name, self._program) \ - for var in self._feed_list] - self._data_loader = fluid.io.DataLoader.from_generator( - feed_list=feed_vars, capacity=3 * self._batch_size, iterable=True) - self._data_loader.set_sample_list_generator( - self._dataset.generator(self._batch_size, drop_last=True), - places=self._place) - - def _calculate_kl_threshold(self): - ''' - Calculate the KL threshold of quantized variables. - ''' - assert self._algo == "KL", "The algo should be KL to calculate kl threshold." - ct = 1 - # Abs_max threshold for weights - for var_name in self._quantized_weight_var_name: - start = time.time() - weight_data = self._sampling_data[var_name] - weight_threshold = None - if self._weight_quantize_type == "abs_max": - weight_threshold = np.max(np.abs(weight_data)) - elif self._weight_quantize_type == "channel_wise_abs_max": - weight_threshold = [] - for i in range(weight_data.shape[0]): - abs_max_value = np.max(np.abs(weight_data[i])) - weight_threshold.append(abs_max_value) - self._quantized_var_kl_threshold[var_name] = weight_threshold - end = time.time() - logging.debug( - '[Calculate weight] Weight_id={}/{}, time_each_weight={} s.'. - format( - str(ct), str(len(self._quantized_weight_var_name)), - str(end - start))) - ct += 1 - - ct = 1 - # KL threshold for activations - if self._is_use_cache_file: - for var_name in self._quantized_act_var_name: - start = time.time() - sampling_data = [] - filenames = [f for f in os.listdir(self._cache_dir) \ - if re.match(var_name + '_[0-9]+.npy', f)] - for filename in filenames: - file_path = os.path.join(self._cache_dir, filename) - sampling_data.append(np.load(file_path)) - os.remove(file_path) - sampling_data = np.concatenate(sampling_data) - self._quantized_var_kl_threshold[var_name] = \ - self._get_kl_scaling_factor(np.abs(sampling_data)) - end = time.time() - logging.debug( - '[Calculate activation] Activation_id={}/{}, time_each_activation={} s.' - .format( - str(ct), str(len(self._quantized_act_var_name)), - str(end - start))) - ct += 1 - else: - for var_name in self._quantized_act_var_name: - start = time.time() - self._sampling_data[var_name] = np.concatenate( - self._sampling_data[var_name]) - self._quantized_var_kl_threshold[var_name] = \ - self._get_kl_scaling_factor(np.abs(self._sampling_data[var_name])) - end = time.time() - logging.debug( - '[Calculate activation] Activation_id={}/{}, time_each_activation={} s.' - .format( - str(ct), str(len(self._quantized_act_var_name)), - str(end - start))) - ct += 1 diff --git a/legacy/contrib/HumanSeg/utils/utils.py b/legacy/contrib/HumanSeg/utils/utils.py deleted file mode 100644 index 435374aea2..0000000000 --- a/legacy/contrib/HumanSeg/utils/utils.py +++ /dev/null @@ -1,275 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import time -import os -import os.path as osp -import numpy as np -import six -import yaml -import math -import cv2 -from . import logging - - -def seconds_to_hms(seconds): - h = math.floor(seconds / 3600) - m = math.floor((seconds - h * 3600) / 60) - s = int(seconds - h * 3600 - m * 60) - hms_str = "{}:{}:{}".format(h, m, s) - return hms_str - - -def setting_environ_flags(): - if 'FLAGS_eager_delete_tensor_gb' not in os.environ: - os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0' - if 'FLAGS_allocator_strategy' not in os.environ: - os.environ['FLAGS_allocator_strategy'] = 'auto_growth' - if "CUDA_VISIBLE_DEVICES" in os.environ: - if os.environ["CUDA_VISIBLE_DEVICES"].count("-1") > 0: - os.environ["CUDA_VISIBLE_DEVICES"] = "" - - -def get_environ_info(): - setting_environ_flags() - import paddle.fluid as fluid - info = dict() - info['place'] = 'cpu' - info['num'] = int(os.environ.get('CPU_NUM', 1)) - if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "": - if hasattr(fluid.core, 'get_cuda_device_count'): - gpu_num = 0 - try: - gpu_num = fluid.core.get_cuda_device_count() - except: - os.environ['CUDA_VISIBLE_DEVICES'] = '' - pass - if gpu_num > 0: - info['place'] = 'cuda' - info['num'] = fluid.core.get_cuda_device_count() - return info - - -def parse_param_file(param_file, return_shape=True): - from paddle.fluid.proto.framework_pb2 import VarType - f = open(param_file, 'rb') - version = np.fromstring(f.read(4), dtype='int32') - lod_level = np.fromstring(f.read(8), dtype='int64') - for i in range(int(lod_level)): - _size = np.fromstring(f.read(8), dtype='int64') - _ = f.read(_size) - version = np.fromstring(f.read(4), dtype='int32') - tensor_desc = VarType.TensorDesc() - tensor_desc_size = np.fromstring(f.read(4), dtype='int32') - tensor_desc.ParseFromString(f.read(int(tensor_desc_size))) - tensor_shape = tuple(tensor_desc.dims) - if return_shape: - f.close() - return tuple(tensor_desc.dims) - if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) - data_size = 4 - for i in range(len(tensor_shape)): - data_size *= tensor_shape[i] - weight = np.fromstring(f.read(data_size), dtype='float32') - f.close() - return np.reshape(weight, tensor_shape) - - -def fuse_bn_weights(exe, main_prog, weights_dir): - import paddle.fluid as fluid - logging.info("Try to fuse weights of batch_norm...") - bn_vars = list() - for block in main_prog.blocks: - ops = list(block.ops) - for op in ops: - if op.type == 'affine_channel': - scale_name = op.input('Scale')[0] - bias_name = op.input('Bias')[0] - prefix = scale_name[:-5] - mean_name = prefix + 'mean' - variance_name = prefix + 'variance' - if not osp.exists(osp.join( - weights_dir, mean_name)) or not osp.exists( - osp.join(weights_dir, variance_name)): - logging.info( - "There's no batch_norm weight found to fuse, skip fuse_bn." - ) - return - - bias = block.var(bias_name) - pretrained_shape = parse_param_file( - osp.join(weights_dir, bias_name)) - actual_shape = tuple(bias.shape) - if pretrained_shape != actual_shape: - continue - bn_vars.append( - [scale_name, bias_name, mean_name, variance_name]) - eps = 1e-5 - for names in bn_vars: - scale_name, bias_name, mean_name, variance_name = names - scale = parse_param_file( - osp.join(weights_dir, scale_name), return_shape=False) - bias = parse_param_file( - osp.join(weights_dir, bias_name), return_shape=False) - mean = parse_param_file( - osp.join(weights_dir, mean_name), return_shape=False) - variance = parse_param_file( - osp.join(weights_dir, variance_name), return_shape=False) - bn_std = np.sqrt(np.add(variance, eps)) - new_scale = np.float32(np.divide(scale, bn_std)) - new_bias = bias - mean * new_scale - scale_tensor = fluid.global_scope().find_var(scale_name).get_tensor() - bias_tensor = fluid.global_scope().find_var(bias_name).get_tensor() - scale_tensor.set(new_scale, exe.place) - bias_tensor.set(new_bias, exe.place) - if len(bn_vars) == 0: - logging.info( - "There's no batch_norm weight found to fuse, skip fuse_bn.") - else: - logging.info("There's {} batch_norm ops been fused.".format( - len(bn_vars))) - - -def load_pdparams(exe, main_prog, model_dir): - import paddle.fluid as fluid - from paddle.fluid.proto.framework_pb2 import VarType - from paddle.fluid.framework import Program - - vars_to_load = list() - import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: - params_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - unused_vars = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if var.name not in params_dict: - raise Exception("{} is not in saved model".format(var.name)) - if var.shape != params_dict[var.name].shape: - unused_vars.append(var.name) - logging.warning( - "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})" - .format(var.name, params_dict[var.name].shape, var.shape)) - continue - vars_to_load.append(var) - logging.debug("Weight {} will be load".format(var.name)) - for var_name in unused_vars: - del params_dict[var_name] - fluid.io.set_program_state(main_prog, params_dict) - - if len(vars_to_load) == 0: - logging.warning( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - logging.info("There are {} varaibles in {} are loaded.".format( - len(vars_to_load), model_dir)) - - -def load_pretrained_weights(exe, main_prog, weights_dir, fuse_bn=False): - if not osp.exists(weights_dir): - raise Exception("Path {} not exists.".format(weights_dir)) - if osp.exists(osp.join(weights_dir, "model.pdparams")): - return load_pdparams(exe, main_prog, weights_dir) - import paddle.fluid as fluid - vars_to_load = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if not osp.exists(osp.join(weights_dir, var.name)): - logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) - continue - pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) - actual_shape = tuple(var.shape) - if pretrained_shape != actual_shape: - logging.warning( - "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})" - .format(weights_dir, var.name, pretrained_shape, actual_shape)) - continue - vars_to_load.append(var) - logging.debug("Weight {} will be load".format(var.name)) - - params_dict = fluid.io.load_program_state( - weights_dir, var_list=vars_to_load) - fluid.io.set_program_state(main_prog, params_dict) - if len(vars_to_load) == 0: - logging.warning( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - logging.info("There are {} varaibles in {} are loaded.".format( - len(vars_to_load), weights_dir)) - if fuse_bn: - fuse_bn_weights(exe, main_prog, weights_dir) - - -def visualize(image, result, save_dir=None, weight=0.6): - """ - Convert segment result to color image, and save added image. - Args: - image: the path of origin image - result: the predict result of image - save_dir: the directory for saving visual image - weight: the image weight of visual image, and the result weight is (1 - weight) - """ - label_map = result['label_map'] - color_map = get_color_map_list(256) - color_map = np.array(color_map).astype("uint8") - # Use OpenCV LUT for color mapping - c1 = cv2.LUT(label_map, color_map[:, 0]) - c2 = cv2.LUT(label_map, color_map[:, 1]) - c3 = cv2.LUT(label_map, color_map[:, 2]) - pseudo_img = np.dstack((c1, c2, c3)) - - im = cv2.imread(image) - vis_result = cv2.addWeighted(im, weight, pseudo_img, 1 - weight, 0) - - if save_dir is not None: - if not os.path.exists(save_dir): - os.makedirs(save_dir) - image_name = os.path.split(image)[-1] - out_path = os.path.join(save_dir, image_name) - cv2.imwrite(out_path, vis_result) - else: - return vis_result - - -def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. - Args: - num_classes: Number of classes - Returns: - The color map - """ - num_classes += 1 - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] - color_map = color_map[1:] - return color_map diff --git a/legacy/contrib/HumanSeg/val.py b/legacy/contrib/HumanSeg/val.py deleted file mode 100644 index e671879f5f..0000000000 --- a/legacy/contrib/HumanSeg/val.py +++ /dev/null @@ -1,78 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from datasets.dataset import Dataset -import transforms -import models - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg training') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for evaluating', - type=str, - default='output/best_model') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='The root directory of dataset', - type=str) - parser.add_argument( - '--val_list', - dest='val_list', - help='Val list file of dataset', - type=str, - default=None) - parser.add_argument( - '--batch_size', - dest='batch_size', - help='Mini batch size', - type=int, - default=128) - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - return parser.parse_args() - - -def evaluate(args): - eval_transforms = transforms.Compose( - [transforms.Resize(args.image_shape), - transforms.Normalize()]) - - eval_dataset = Dataset( - data_dir=args.data_dir, - file_list=args.val_list, - transforms=eval_transforms, - num_workers='auto', - buffer_size=100, - parallel_method='thread', - shuffle=False) - - model = models.load_model(args.model_dir) - model.evaluate(eval_dataset, args.batch_size) - - -if __name__ == '__main__': - args = parse_args() - - evaluate(args) diff --git a/legacy/contrib/HumanSeg/video_infer.py b/legacy/contrib/HumanSeg/video_infer.py deleted file mode 100644 index e69db69661..0000000000 --- a/legacy/contrib/HumanSeg/video_infer.py +++ /dev/null @@ -1,179 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import os.path as osp -import cv2 -import numpy as np - -from utils.humanseg_postprocess import postprocess, threshold_mask -import models -import transforms - - -def parse_args(): - parser = argparse.ArgumentParser(description='HumanSeg inference for video') - parser.add_argument( - '--model_dir', - dest='model_dir', - help='Model path for inference', - type=str) - parser.add_argument( - '--video_path', - dest='video_path', - help= - 'Video path for inference, camera will be used if the path not existing', - type=str, - default=None) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='The directory for saving the inference results', - type=str, - default='./output') - parser.add_argument( - "--image_shape", - dest="image_shape", - help="The image shape for net inputs.", - nargs=2, - default=[192, 192], - type=int) - - return parser.parse_args() - - -def predict(img, model, test_transforms): - model.arrange_transform(transforms=test_transforms, mode='test') - img, im_info = test_transforms(img) - img = np.expand_dims(img, axis=0) - result = model.exe.run( - model.test_prog, - feed={'image': img}, - fetch_list=list(model.test_outputs.values())) - score_map = result[1] - score_map = np.squeeze(score_map, axis=0) - score_map = np.transpose(score_map, (1, 2, 0)) - return score_map, im_info - - -def recover(img, im_info): - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] - img = cv2.resize(img, (w, h), cv2.INTER_LINEAR) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] - img = img[0:h, 0:w] - return img - - -def video_infer(args): - resize_h = args.image_shape[1] - resize_w = args.image_shape[0] - - test_transforms = transforms.Compose( - [transforms.Resize((resize_w, resize_h)), - transforms.Normalize()]) - model = models.load_model(args.model_dir) - if not args.video_path: - cap = cv2.VideoCapture(0) - else: - cap = cv2.VideoCapture(args.video_path) - if not cap.isOpened(): - raise IOError("Error opening video stream or file, " - "--video_path whether existing: {}" - " or camera whether working".format(args.video_path)) - return - - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - - disflow = cv2.DISOpticalFlow_create(cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) - prev_gray = np.zeros((resize_h, resize_w), np.uint8) - prev_cfd = np.zeros((resize_h, resize_w), np.float32) - is_init = True - - fps = cap.get(cv2.CAP_PROP_FPS) - if args.video_path: - print('Please wait. It is computing......') - # 用于保存预测结果视频 - if not osp.exists(args.save_dir): - os.makedirs(args.save_dir) - out = cv2.VideoWriter( - osp.join(args.save_dir, 'result.avi'), - cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) - # 开始获取视频帧 - while cap.isOpened(): - ret, frame = cap.read() - if ret: - score_map, im_info = predict(frame, model, test_transforms) - cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) - score_map = 255 * score_map[:, :, 1] - optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ - disflow, is_init) - prev_gray = cur_gray.copy() - prev_cfd = optflow_map.copy() - is_init = False - optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) - optflow_map = threshold_mask( - optflow_map, thresh_bg=0.2, thresh_fg=0.8) - img_matting = np.repeat( - optflow_map[:, :, np.newaxis], 3, axis=2) - img_matting = recover(img_matting, im_info) - bg_im = np.ones_like(img_matting) * 255 - comb = (img_matting * frame + (1 - img_matting) * bg_im).astype( - np.uint8) - out.write(comb) - else: - break - cap.release() - out.release() - - else: - while cap.isOpened(): - ret, frame = cap.read() - if ret: - score_map, im_info = predict(frame, model, test_transforms) - cur_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) - score_map = 255 * score_map[:, :, 1] - optflow_map = postprocess(cur_gray, score_map, prev_gray, prev_cfd, \ - disflow, is_init) - prev_gray = cur_gray.copy() - prev_cfd = optflow_map.copy() - is_init = False - optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) - optflow_map = threshold_mask( - optflow_map, thresh_bg=0.2, thresh_fg=0.8) - img_matting = np.repeat( - optflow_map[:, :, np.newaxis], 3, axis=2) - img_matting = recover(img_matting, im_info) - bg_im = np.ones_like(img_matting) * 255 - comb = (img_matting * frame + (1 - img_matting) * bg_im).astype( - np.uint8) - cv2.imshow('HumanSegmentation', comb) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - else: - break - cap.release() - - -if __name__ == "__main__": - args = parse_args() - video_infer(args) diff --git a/legacy/contrib/LaneNet/README.md b/legacy/contrib/LaneNet/README.md deleted file mode 100644 index 7150dc1da7..0000000000 --- a/legacy/contrib/LaneNet/README.md +++ /dev/null @@ -1,136 +0,0 @@ -# LaneNet 模型训练教程 - -* 本教程旨在介绍如何通过使用PaddleSeg进行车道线检测 - -* 在阅读本教程前,请确保您已经了解过PaddleSeg的[快速入门](../README.md#快速入门)和[基础功能](../README.md#基础功能)等章节,以便对PaddleSeg有一定的了解 - -## 环境依赖 - -* PaddlePaddle >= 1.7.0 或develop版本 -* Python 3.5+ - -通过以下命令安装python包依赖,请确保在该分支上至少执行过一次以下命令 -```shell -$ pip install -r requirements.txt -``` - -## 一. 准备待训练数据 - -我们提前准备好了一份处理好的数据集,通过以下代码进行下载,该数据集由图森车道线检测数据集转换而来,你也可以在这个[页面](https://github.com/TuSimple/tusimple-benchmark/issues/3)下载原始数据集。 - -```shell -python dataset/download_tusimple.py -``` - -数据目录结构 -``` -LaneNet -|-- dataset - |-- tusimple_lane_detection - |-- training - |-- gt_binary_image - |-- gt_image - |-- gt_instance_image - |-- train_part.txt - |-- val_part.txt -``` -## 二. 下载预训练模型 - -下载[vgg预训练模型](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar),放在```pretrained_models```文件夹下。 - - -## 三. 准备配置 - -接着我们需要确定相关配置,从本教程的角度,配置分为三部分: - -* 数据集 - * 训练集主目录 - * 训练集文件列表 - * 测试集文件列表 - * 评估集文件列表 -* 预训练模型 - * 预训练模型名称 - * 预训练模型的backbone网络 - * 预训练模型路径 -* 其他 - * 学习率 - * Batch大小 - * ... - -在三者中,预训练模型的配置尤为重要,如果模型或者BACKBONE配置错误,会导致预训练的参数没有加载,进而影响收敛速度。预训练模型相关的配置如第二步所展示。 - -数据集的配置和数据路径有关,在本教程中,数据存放在`dataset/tusimple_lane_detection`中 - -其他配置则根据数据集和机器环境的情况进行调节,最终我们保存一个如下内容的yaml配置文件,存放路径为**configs/lanenet.yaml** - -```yaml -# 数据集配置 -DATASET: - DATA_DIR: "./dataset/tusimple_lane_detection" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/tusimple_lane_detection/training/val_part.txt" - TRAIN_FILE_LIST: "./dataset/tusimple_lane_detection/training/train_part.txt" - VAL_FILE_LIST: "./dataset/tusimple_lane_detection/training/val_part.txt" - SEPARATOR: " " - -# 预训练模型配置 -MODEL: - MODEL_NAME: "lanenet" - -# 其他配置 -EVAL_CROP_SIZE: (512, 256) -TRAIN_CROP_SIZE: (512, 256) -AUG: - AUG_METHOD: u"unpadding" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (512, 256) # (width, height), for unpadding - MIRROR: False - RICH_CROP: - ENABLE: False -BATCH_SIZE: 4 -TEST: - TEST_MODEL: "./saved_model/lanenet/final/" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/lanenet/" - PRETRAINED_MODEL_DIR: "./pretrained_models/VGG16_pretrained" - SNAPSHOT_EPOCH: 5 -SOLVER: - NUM_EPOCHS: 100 - LR: 0.0005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - WEIGHT_DECAY: 0.001 -``` - - -## 五. 开始训练 - -使用下述命令启动训练 - -```shell -CUDA_VISIBLE_DEVICES=0 python -u train.py --cfg configs/lanenet.yaml --use_gpu --do_eval -``` - -## 六. 进行评估 - -模型训练完成,使用下述命令启动评估 - -```shell -CUDA_VISIBLE_DEVICES=0 python -u eval.py --use_gpu --cfg configs/lanenet.yaml -``` - -## 七. 可视化 -需要先下载一个车前视角和鸟瞰图视角转换所需文件,点击[链接](https://paddleseg.bj.bcebos.com/resources/tusimple_ipm_remap.tar),下载后放在```./utils```下。同时我们提供了一个训练好的模型,点击[链接](https://paddleseg.bj.bcebos.com/models/lanenet_vgg_tusimple.tar),下载后放在```./pretrained_models/```下,使用如下命令进行可视化 -```shell -CUDA_VISIBLE_DEVICES=0 python -u ./vis.py --cfg configs/lanenet.yaml --use_gpu --vis_dir vis_result \ -TEST.TEST_MODEL pretrained_models/LaneNet_vgg_tusimple/ -``` - -可视化结果示例: - - 预测结果:
- ![](imgs/0005_pred_lane.png) - 分割结果:
- ![](imgs/0005_pred_binary.png)
- 车道线实例预测结果:
- ![](imgs/0005_pred_instance.png) diff --git a/legacy/contrib/LaneNet/configs/lanenet.yaml b/legacy/contrib/LaneNet/configs/lanenet.yaml deleted file mode 100644 index bdf669a1f5..0000000000 --- a/legacy/contrib/LaneNet/configs/lanenet.yaml +++ /dev/null @@ -1,51 +0,0 @@ -EVAL_CROP_SIZE: (512, 256) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (512, 256) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: u"unpadding" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (512, 256) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: False - RICH_CROP: - ENABLE: False - -BATCH_SIZE: 4 - -DATALOADER: - BUF_SIZE: 256 - NUM_WORKERS: 4 -DATASET: - DATA_DIR: "./dataset/tusimple_lane_detection" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 2 - TEST_FILE_LIST: "./dataset/tusimple_lane_detection/training/val_part.txt" - TEST_TOTAL_IMAGES: 362 - TRAIN_FILE_LIST: "./dataset/tusimple_lane_detection/training/train_part.txt" - TRAIN_TOTAL_IMAGES: 3264 - VAL_FILE_LIST: "./dataset/tusimple_lane_detection/training/val_part.txt" - VAL_TOTAL_IMAGES: 362 - SEPARATOR: " " - IGNORE_INDEX: 255 - -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - MODEL_NAME: "lanenet" - DEFAULT_NORM_TYPE: "bn" -TEST: - TEST_MODEL: "./saved_model/lanenet/final/" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/lanenet/" - PRETRAINED_MODEL_DIR: "./pretrained_models/VGG16_pretrained" - SNAPSHOT_EPOCH: 1 -SOLVER: - NUM_EPOCHS: 100 - LR: 0.0005 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - WEIGHT_DECAY: 0.001 diff --git a/legacy/contrib/LaneNet/data_aug.py b/legacy/contrib/LaneNet/data_aug.py deleted file mode 100644 index 13f8dbfd72..0000000000 --- a/legacy/contrib/LaneNet/data_aug.py +++ /dev/null @@ -1,85 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import cv2 -import numpy as np -from utils.config import cfg -from models.model_builder import ModelPhase -from pdseg.data_aug import get_random_scale, randomly_scale_image_and_label, random_rotation, \ - rand_scale_aspect, hsv_color_jitter, rand_crop - - -def resize(img, grt=None, grt_instance=None, mode=ModelPhase.TRAIN): - """ - 改变图像及标签图像尺寸 - AUG.AUG_METHOD为unpadding,所有模式均直接resize到AUG.FIX_RESIZE_SIZE的尺寸 - AUG.AUG_METHOD为stepscaling, 按比例resize,训练时比例范围AUG.MIN_SCALE_FACTOR到AUG.MAX_SCALE_FACTOR,间隔为AUG.SCALE_STEP_SIZE,其他模式返回原图 - AUG.AUG_METHOD为rangescaling,长边对齐,短边按比例变化,训练时长边对齐范围AUG.MIN_RESIZE_VALUE到AUG.MAX_RESIZE_VALUE,其他模式长边对齐AUG.INF_RESIZE_VALUE - - Args: - img(numpy.ndarray): 输入图像 - grt(numpy.ndarray): 标签图像,默认为None - mode(string): 模式, 默认训练模式,即ModelPhase.TRAIN - - Returns: - resize后的图像和标签图 - - """ - - if cfg.AUG.AUG_METHOD == 'unpadding': - target_size = cfg.AUG.FIX_RESIZE_SIZE - img = cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR) - if grt is not None: - grt = cv2.resize(grt, target_size, interpolation=cv2.INTER_NEAREST) - if grt_instance is not None: - grt_instance = cv2.resize( - grt_instance, target_size, interpolation=cv2.INTER_NEAREST) - elif cfg.AUG.AUG_METHOD == 'stepscaling': - if mode == ModelPhase.TRAIN: - min_scale_factor = cfg.AUG.MIN_SCALE_FACTOR - max_scale_factor = cfg.AUG.MAX_SCALE_FACTOR - step_size = cfg.AUG.SCALE_STEP_SIZE - scale_factor = get_random_scale(min_scale_factor, max_scale_factor, - step_size) - img, grt = randomly_scale_image_and_label( - img, grt, scale=scale_factor) - elif cfg.AUG.AUG_METHOD == 'rangescaling': - min_resize_value = cfg.AUG.MIN_RESIZE_VALUE - max_resize_value = cfg.AUG.MAX_RESIZE_VALUE - if mode == ModelPhase.TRAIN: - if min_resize_value == max_resize_value: - random_size = min_resize_value - else: - random_size = int( - np.random.uniform(min_resize_value, max_resize_value) + 0.5) - else: - random_size = cfg.AUG.INF_RESIZE_VALUE - - value = max(img.shape[0], img.shape[1]) - scale = float(random_size) / float(value) - img = cv2.resize( - img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) - if grt is not None: - grt = cv2.resize( - grt, (0, 0), - fx=scale, - fy=scale, - interpolation=cv2.INTER_NEAREST) - else: - raise Exception("Unexpect data augmention method: {}".format( - cfg.AUG.AUG_METHOD)) - - return img, grt, grt_instance diff --git a/legacy/contrib/LaneNet/dataset/download_tusimple.py b/legacy/contrib/LaneNet/dataset/download_tusimple.py deleted file mode 100644 index 1f082f3876..0000000000 --- a/legacy/contrib/LaneNet/dataset/download_tusimple.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "../../../", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_tusimple_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/tusimple_lane_detection.tar" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_tusimple_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/contrib/LaneNet/eval.py b/legacy/contrib/LaneNet/eval.py deleted file mode 100644 index e08d371f64..0000000000 --- a/legacy/contrib/LaneNet/eval.py +++ /dev/null @@ -1,189 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(os.path.split(cur_path)[0])[0] -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -SEG_PATH = os.path.join(LOCAL_PATH, "../../../") -sys.path.append(SEG_PATH) -sys.path.append(root_path) - -import time -import argparse -import functools -import pprint -import cv2 -import numpy as np -import paddle -import paddle.fluid as fluid - -from utils.config import cfg -from pdseg.utils.timer import Timer, calculate_eta -from models.model_builder import build_model -from models.model_builder import ModelPhase -from reader import LaneNetDataset - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddleSeg model evalution') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess IO or not', - action='store_true', - default=False) - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): - np.set_printoptions(precision=5, suppress=True) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - - dataset = LaneNetDataset( - file_list=cfg.DATASET.VAL_FILE_LIST, - mode=ModelPhase.TRAIN, - shuffle=True, - data_dir=cfg.DATASET.DATA_DIR) - - def data_generator(): - #TODO: check is batch reader compatitable with Windows - if use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - for b in data_gen: - yield b - - data_loader, pred, grts, masks, accuracy, fp, fn = build_model( - test_prog, startup_prog, phase=ModelPhase.EVAL) - - data_loader.set_sample_generator( - data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) - - # Get device environment - places = fluid.cuda_places() if use_gpu else fluid.cpu_places() - place = places[0] - dev_count = len(places) - print("#Device count: {}".format(dev_count)) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - test_prog = test_prog.clone(for_test=True) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - if ckpt_dir is not None: - print('load test model:', ckpt_dir) - try: - fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) - except: - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - # Use streaming confusion matrix to calculate mean_iou - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - fetch_list = [ - pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name - ] - num_images = 0 - step = 0 - avg_acc = 0.0 - avg_fp = 0.0 - avg_fn = 0.0 - # cur_images = 0 - all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 - timer = Timer() - timer.start() - data_loader.start() - while True: - try: - step += 1 - pred, grts, masks, out_acc, out_fp, out_fn = exe.run( - test_prog, fetch_list=fetch_list, return_numpy=True) - - avg_acc += np.mean(out_acc) * pred.shape[0] - avg_fp += np.mean(out_fp) * pred.shape[0] - avg_fn += np.mean(out_fn) * pred.shape[0] - num_images += pred.shape[0] - - speed = 1.0 / timer.elapsed_time() - - print( - "[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}" - .format(step, avg_acc / num_images, avg_fp / num_images, - avg_fn / num_images, speed, - calculate_eta(all_step - step, speed))) - - timer.restart() - sys.stdout.flush() - except fluid.core.EOFException: - break - - print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format( - num_images, avg_acc / num_images, avg_fp / num_images, - avg_fn / num_images)) - - return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images - - -def main(): - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - evaluate(cfg, **args.__dict__) - - -if __name__ == '__main__': - main() diff --git a/legacy/contrib/LaneNet/imgs/0005_pred_binary.png b/legacy/contrib/LaneNet/imgs/0005_pred_binary.png deleted file mode 100644 index 77f66b2510..0000000000 Binary files a/legacy/contrib/LaneNet/imgs/0005_pred_binary.png and /dev/null differ diff --git a/legacy/contrib/LaneNet/imgs/0005_pred_instance.png b/legacy/contrib/LaneNet/imgs/0005_pred_instance.png deleted file mode 100644 index ec99b30e49..0000000000 Binary files a/legacy/contrib/LaneNet/imgs/0005_pred_instance.png and /dev/null differ diff --git a/legacy/contrib/LaneNet/imgs/0005_pred_lane.png b/legacy/contrib/LaneNet/imgs/0005_pred_lane.png deleted file mode 100644 index 18c656f734..0000000000 Binary files a/legacy/contrib/LaneNet/imgs/0005_pred_lane.png and /dev/null differ diff --git a/legacy/contrib/LaneNet/loss.py b/legacy/contrib/LaneNet/loss.py deleted file mode 100644 index 3c97dcb7de..0000000000 --- a/legacy/contrib/LaneNet/loss.py +++ /dev/null @@ -1,137 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as fluid -import numpy as np -from utils.config import cfg - - -def unsorted_segment_sum(data, segment_ids, unique_labels, feature_dims): - unique_labels_shape = fluid.layers.shape(unique_labels) - zeros = fluid.layers.fill_constant( - shape=[unique_labels_shape[0], feature_dims], dtype='float32', value=0) - segment_ids = fluid.layers.unsqueeze(segment_ids, axes=[1]) - segment_ids.stop_gradient = True - segment_sum = fluid.layers.scatter_nd_add(zeros, segment_ids, data) - zeros.stop_gradient = True - - return segment_sum - - -def norm(x, axis=-1): - distance = fluid.layers.reduce_sum( - fluid.layers.abs(x), dim=axis, keep_dim=True) - return distance - - -def discriminative_loss_single(prediction, correct_label, feature_dim, - label_shape, delta_v, delta_d, param_var, - param_dist, param_reg): - - correct_label = fluid.layers.reshape(correct_label, - [label_shape[1] * label_shape[0]]) - prediction = fluid.layers.transpose(prediction, [1, 2, 0]) - reshaped_pred = fluid.layers.reshape( - prediction, [label_shape[1] * label_shape[0], feature_dim]) - - unique_labels, unique_id, counts = fluid.layers.unique_with_counts( - correct_label) - correct_label.stop_gradient = True - counts = fluid.layers.cast(counts, 'float32') - num_instances = fluid.layers.shape(unique_labels) - - segmented_sum = unsorted_segment_sum( - reshaped_pred, unique_id, unique_labels, feature_dims=feature_dim) - - counts_rsp = fluid.layers.reshape(counts, (-1, 1)) - mu = fluid.layers.elementwise_div(segmented_sum, counts_rsp) - counts_rsp.stop_gradient = True - mu_expand = fluid.layers.gather(mu, unique_id) - tmp = fluid.layers.elementwise_sub(mu_expand, reshaped_pred) - - distance = norm(tmp) - distance = distance - delta_v - - distance_pos = fluid.layers.greater_equal(distance, - fluid.layers.zeros_like(distance)) - distance_pos = fluid.layers.cast(distance_pos, 'float32') - distance = distance * distance_pos - - distance = fluid.layers.square(distance) - - l_var = unsorted_segment_sum( - distance, unique_id, unique_labels, feature_dims=1) - l_var = fluid.layers.elementwise_div(l_var, counts_rsp) - l_var = fluid.layers.reduce_sum(l_var) - l_var = l_var / fluid.layers.cast(num_instances * (num_instances - 1), - 'float32') - - mu_interleaved_rep = fluid.layers.expand(mu, [num_instances, 1]) - mu_band_rep = fluid.layers.expand(mu, [1, num_instances]) - mu_band_rep = fluid.layers.reshape( - mu_band_rep, (num_instances * num_instances, feature_dim)) - - mu_diff = fluid.layers.elementwise_sub(mu_band_rep, mu_interleaved_rep) - - intermediate_tensor = fluid.layers.reduce_sum( - fluid.layers.abs(mu_diff), dim=1) - intermediate_tensor.stop_gradient = True - zero_vector = fluid.layers.zeros([1], 'float32') - bool_mask = fluid.layers.not_equal(intermediate_tensor, zero_vector) - temp = fluid.layers.where(bool_mask) - mu_diff_bool = fluid.layers.gather(mu_diff, temp) - - mu_norm = norm(mu_diff_bool) - mu_norm = 2. * delta_d - mu_norm - mu_norm_pos = fluid.layers.greater_equal(mu_norm, - fluid.layers.zeros_like(mu_norm)) - mu_norm_pos = fluid.layers.cast(mu_norm_pos, 'float32') - mu_norm = mu_norm * mu_norm_pos - mu_norm_pos.stop_gradient = True - - mu_norm = fluid.layers.square(mu_norm) - - l_dist = fluid.layers.reduce_mean(mu_norm) - - l_reg = fluid.layers.reduce_mean(norm(mu, axis=1)) - - l_var = param_var * l_var - l_dist = param_dist * l_dist - l_reg = param_reg * l_reg - loss = l_var + l_dist + l_reg - return loss, l_var, l_dist, l_reg - - -def discriminative_loss(prediction, correct_label, feature_dim, image_shape, - delta_v, delta_d, param_var, param_dist, param_reg): - batch_size = int(cfg.BATCH_SIZE_PER_DEV) - output_ta_loss = 0. - output_ta_var = 0. - output_ta_dist = 0. - output_ta_reg = 0. - for i in range(batch_size): - disc_loss_single, l_var_single, l_dist_single, l_reg_single = discriminative_loss_single( - prediction[i], correct_label[i], feature_dim, image_shape, delta_v, - delta_d, param_var, param_dist, param_reg) - output_ta_loss += disc_loss_single - output_ta_var += l_var_single - output_ta_dist += l_dist_single - output_ta_reg += l_reg_single - - disc_loss = output_ta_loss / batch_size - l_var = output_ta_var / batch_size - l_dist = output_ta_dist / batch_size - l_reg = output_ta_reg / batch_size - return disc_loss, l_var, l_dist, l_reg diff --git a/legacy/contrib/LaneNet/models/model_builder.py b/legacy/contrib/LaneNet/models/model_builder.py deleted file mode 100644 index 03548efbe7..0000000000 --- a/legacy/contrib/LaneNet/models/model_builder.py +++ /dev/null @@ -1,272 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -sys.path.append("..") -import struct - -import paddle.fluid as fluid -from paddle.fluid.proto.framework_pb2 import VarType - -from pdseg import solver -from utils.config import cfg -from pdseg.loss import multi_softmax_with_loss -from loss import discriminative_loss -from models.modeling import lanenet - - -class ModelPhase(object): - """ - Standard name for model phase in PaddleSeg - - The following standard keys are defined: - * `TRAIN`: training mode. - * `EVAL`: testing/evaluation mode. - * `PREDICT`: prediction/inference mode. - * `VISUAL` : visualization mode - """ - - TRAIN = 'train' - EVAL = 'eval' - PREDICT = 'predict' - VISUAL = 'visual' - - @staticmethod - def is_train(phase): - return phase == ModelPhase.TRAIN - - @staticmethod - def is_predict(phase): - return phase == ModelPhase.PREDICT - - @staticmethod - def is_eval(phase): - return phase == ModelPhase.EVAL - - @staticmethod - def is_visual(phase): - return phase == ModelPhase.VISUAL - - @staticmethod - def is_valid_phase(phase): - """ Check valid phase """ - if ModelPhase.is_train(phase) or ModelPhase.is_predict(phase) \ - or ModelPhase.is_eval(phase) or ModelPhase.is_visual(phase): - return True - - return False - - -def seg_model(image, class_num): - model_name = cfg.MODEL.MODEL_NAME - if model_name == 'lanenet': - logits = lanenet.lanenet(image, class_num) - else: - raise Exception( - "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet" - ) - return logits - - -def softmax(logit): - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.softmax(logit) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - - -def sigmoid_to_softmax(logit): - """ - one channel to two channel - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit_back = 1 - logit - logit = fluid.layers.concat([logit_back, logit], axis=-1) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - - -def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): - if not ModelPhase.is_valid_phase(phase): - raise ValueError("ModelPhase {} is not valid!".format(phase)) - if ModelPhase.is_train(phase): - width = cfg.TRAIN_CROP_SIZE[0] - height = cfg.TRAIN_CROP_SIZE[1] - else: - width = cfg.EVAL_CROP_SIZE[0] - height = cfg.EVAL_CROP_SIZE[1] - - image_shape = [-1, cfg.DATASET.DATA_DIM, height, width] - grt_shape = [-1, 1, height, width] - class_num = cfg.DATASET.NUM_CLASSES - - with fluid.program_guard(main_prog, start_prog): - with fluid.unique_name.guard(): - image = fluid.data(name='image', shape=image_shape, dtype='float32') - label = fluid.data(name='label', shape=grt_shape, dtype='int32') - if cfg.MODEL.MODEL_NAME == 'lanenet': - label_instance = fluid.data( - name='label_instance', shape=grt_shape, dtype='int32') - mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - - # use DataLoader.from_generator when doing traning and evaluation - if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[image, label, label_instance, mask], - capacity=cfg.DATALOADER.BUF_SIZE, - iterable=False, - use_double_buffer=True) - - loss_type = cfg.SOLVER.LOSS - if not isinstance(loss_type, list): - loss_type = list(loss_type) - - logits = seg_model(image, class_num) - - if ModelPhase.is_train(phase): - loss_valid = False - valid_loss = [] - if cfg.MODEL.MODEL_NAME == 'lanenet': - embeding_logit = logits[1] - logits = logits[0] - disc_loss, _, _, l_reg = discriminative_loss( - embeding_logit, label_instance, 4, image_shape[2:], 0.5, - 3.0, 1.0, 1.0, 0.001) - - if "softmax_loss" in loss_type: - weight = None - if cfg.MODEL.MODEL_NAME == 'lanenet': - weight = get_dynamic_weight(label) - seg_loss = multi_softmax_with_loss(logits, label, mask, - class_num, weight) - loss_valid = True - valid_loss.append("softmax_loss") - - if not loss_valid: - raise Exception( - "SOLVER.LOSS: {} is set wrong. it should " - "include one of (softmax_loss, bce_loss, dice_loss) at least" - " example: ['softmax_loss']".format(cfg.SOLVER.LOSS)) - - invalid_loss = [x for x in loss_type if x not in valid_loss] - if len(invalid_loss) > 0: - print( - "Warning: the loss {} you set is invalid. it will not be included in loss computed." - .format(invalid_loss)) - - avg_loss = disc_loss + 0.00001 * l_reg + seg_loss - - #get pred result in original size - if isinstance(logits, tuple): - logit = logits[0] - else: - logit = logits - - if logit.shape[2:] != label.shape[2:]: - logit = fluid.layers.resize_bilinear(logit, label.shape[2:]) - - # return image input and logit output for inference graph prune - if ModelPhase.is_predict(phase): - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - return image, logit - - if class_num == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - if ModelPhase.is_visual(phase): - if cfg.MODEL.MODEL_NAME == 'lanenet': - return pred, logits[1] - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - return pred, logit - - accuracy, fp, fn = compute_metric(pred, label) - if ModelPhase.is_eval(phase): - return data_loader, pred, label, mask, accuracy, fp, fn - - if ModelPhase.is_train(phase): - optimizer = solver.Solver(main_prog, start_prog) - decayed_lr = optimizer.optimise(avg_loss) - return data_loader, avg_loss, decayed_lr, pred, label, mask, disc_loss, seg_loss, accuracy, fp, fn - - -def compute_metric(pred, label): - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - - idx = fluid.layers.where(pred == 1) - pix_cls_ret = fluid.layers.gather_nd(label, idx) - - correct_num = fluid.layers.reduce_sum( - fluid.layers.cast(pix_cls_ret, 'float32')) - - gt_num = fluid.layers.cast( - fluid.layers.shape( - fluid.layers.gather_nd(label, fluid.layers.where(label == 1)))[0], - 'int64') - pred_num = fluid.layers.cast( - fluid.layers.shape(fluid.layers.gather_nd(pred, idx))[0], 'int64') - accuracy = correct_num / gt_num - - false_pred = pred_num - correct_num - fp = fluid.layers.cast(false_pred, 'float32') / fluid.layers.cast( - fluid.layers.shape(pix_cls_ret)[0], 'int64') - - label_cls_ret = fluid.layers.gather_nd(label, - fluid.layers.where(label == 1)) - mis_pred = fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], - 'int64') - correct_num - fn = fluid.layers.cast(mis_pred, 'float32') / fluid.layers.cast( - fluid.layers.shape(label_cls_ret)[0], 'int64') - accuracy.stop_gradient = True - fp.stop_gradient = True - fn.stop_gradient = True - return accuracy, fp, fn - - -def get_dynamic_weight(label): - label = fluid.layers.reshape(label, [-1]) - unique_labels, unique_id, counts = fluid.layers.unique_with_counts(label) - counts = fluid.layers.cast(counts, 'float32') - weight = 1.0 / fluid.layers.log( - (counts / fluid.layers.reduce_sum(counts) + 1.02)) - return weight - - -def to_int(string, dest="I"): - return struct.unpack(dest, string)[0] - - -def parse_shape_from_file(filename): - with open(filename, "rb") as file: - version = file.read(4) - lod_level = to_int(file.read(8), dest="Q") - for i in range(lod_level): - _size = to_int(file.read(8), dest="Q") - _ = file.read(_size) - version = file.read(4) - tensor_desc_size = to_int(file.read(4)) - tensor_desc = VarType.TensorDesc() - tensor_desc.ParseFromString(file.read(tensor_desc_size)) - return tuple(tensor_desc.dims) diff --git a/legacy/contrib/LaneNet/models/modeling/__init__.py b/legacy/contrib/LaneNet/models/modeling/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/LaneNet/models/modeling/lanenet.py b/legacy/contrib/LaneNet/models/modeling/lanenet.py deleted file mode 100644 index 96de6f5fa5..0000000000 --- a/legacy/contrib/LaneNet/models/modeling/lanenet.py +++ /dev/null @@ -1,559 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid - -from utils.config import cfg -from pdseg.models.backbone.vgg import VGGNet as vgg_backbone -from pdseg.models.libs.model_libs import bn, relu -from pdseg.models.libs.model_libs import conv, max_pool, deconv -from pdseg.models.libs.model_libs import scope - -# from models.backbone.vgg import VGGNet as vgg_backbone - -# Bottleneck type -REGULAR = 1 -DOWNSAMPLING = 2 -UPSAMPLING = 3 -DILATED = 4 -ASYMMETRIC = 5 - - -def prelu(x, decoder=False): - # If decoder, then perform relu else perform prelu - if decoder: - return fluid.layers.relu(x) - return fluid.layers.prelu(x, 'channel') - - -def initial_block(inputs, name_scope='initial_block'): - ''' - The initial block for ENet has 2 branches: The convolution branch and MaxPool branch. - The conv branch has 13 filters, while the maxpool branch gives 3 channels corresponding to the RGB channels. - Both output layers are then concatenated to give an output of 16 channels. - - :param inputs(Tensor): A 4D tensor of shape [batch_size, height, width, channels] - :return net_concatenated(Tensor): a 4D Tensor of new shape [batch_size, height, width, channels] - ''' - # Convolutional branch - with scope(name_scope): - net_conv = conv(inputs, 13, 3, stride=2, padding=1) - net_conv = bn(net_conv) - net_conv = fluid.layers.prelu(net_conv, 'channel') - - # Max pool branch - net_pool = max_pool(inputs, [2, 2], stride=2, padding='SAME') - - # Concatenated output - does it matter max pool comes first or conv comes first? probably not. - net_concatenated = fluid.layers.concat([net_conv, net_pool], axis=1) - return net_concatenated - - -def bottleneck(inputs, - output_depth, - filter_size, - regularizer_prob, - projection_ratio=4, - type=REGULAR, - seed=0, - output_shape=None, - dilation_rate=None, - decoder=False, - name_scope='bottleneck'): - # Calculate the depth reduction based on the projection ratio used in 1x1 convolution. - reduced_depth = int(inputs.shape[1] / projection_ratio) - - # DOWNSAMPLING BOTTLENECK - if type == DOWNSAMPLING: - # =============MAIN BRANCH============= - # Just perform a max pooling - with scope('down_sample'): - inputs_shape = inputs.shape - with scope('main_max_pool'): - net_main = fluid.layers.conv2d( - inputs, - inputs_shape[1], - filter_size=3, - stride=2, - padding='SAME') - - # First get the difference in depth to pad, then pad with zeros only on the last dimension. - depth_to_pad = abs(inputs_shape[1] - output_depth) - paddings = [0, 0, 0, depth_to_pad, 0, 0, 0, 0] - with scope('main_padding'): - net_main = fluid.layers.pad(net_main, paddings=paddings) - - with scope('block1'): - net = conv( - inputs, reduced_depth, [2, 2], stride=2, padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - with scope('block2'): - net = conv( - net, - reduced_depth, [filter_size, filter_size], - padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - with scope('block3'): - net = conv(net, output_depth, [1, 1], padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - # Regularizer - net = fluid.layers.dropout(net, regularizer_prob, seed=seed) - - # Finally, combine the two branches together via an element-wise addition - net = fluid.layers.elementwise_add(net, net_main) - net = prelu(net, decoder=decoder) - - return net, inputs_shape - - # DILATION CONVOLUTION BOTTLENECK - # Everything is the same as a regular bottleneck except for the dilation rate argument - elif type == DILATED: - # Check if dilation rate is given - if not dilation_rate: - raise ValueError('Dilation rate is not given.') - - with scope('dilated'): - # Save the main branch for addition later - net_main = inputs - - # First projection with 1x1 kernel (dimensionality reduction) - with scope('block1'): - net = conv(inputs, reduced_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Second conv block --- apply dilated convolution here - with scope('block2'): - net = conv( - net, - reduced_depth, - filter_size, - padding='SAME', - dilation=dilation_rate) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Final projection with 1x1 kernel (Expansion) - with scope('block3'): - net = conv(net, output_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Regularizer - net = fluid.layers.dropout(net, regularizer_prob, seed=seed) - net = prelu(net, decoder=decoder) - - # Add the main branch - net = fluid.layers.elementwise_add(net_main, net) - net = prelu(net, decoder=decoder) - - return net - - # ASYMMETRIC CONVOLUTION BOTTLENECK - # Everything is the same as a regular bottleneck except for a [5,5] kernel decomposed into two [5,1] then [1,5] - elif type == ASYMMETRIC: - # Save the main branch for addition later - with scope('asymmetric'): - net_main = inputs - # First projection with 1x1 kernel (dimensionality reduction) - with scope('block1'): - net = conv(inputs, reduced_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Second conv block --- apply asymmetric conv here - with scope('block2'): - with scope('asymmetric_conv2a'): - net = conv( - net, reduced_depth, [filter_size, 1], padding='same') - with scope('asymmetric_conv2b'): - net = conv( - net, reduced_depth, [1, filter_size], padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - # Final projection with 1x1 kernel - with scope('block3'): - net = conv(net, output_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Regularizer - net = fluid.layers.dropout(net, regularizer_prob, seed=seed) - net = prelu(net, decoder=decoder) - - # Add the main branch - net = fluid.layers.elementwise_add(net_main, net) - net = prelu(net, decoder=decoder) - - return net - - # UPSAMPLING BOTTLENECK - # Everything is the same as a regular one, except convolution becomes transposed. - elif type == UPSAMPLING: - # Check if pooling indices is given - - # Check output_shape given or not - if output_shape is None: - raise ValueError('Output depth is not given') - - # =======MAIN BRANCH======= - # Main branch to upsample. output shape must match with the shape of the layer that was pooled initially, in order - # for the pooling indices to work correctly. However, the initial pooled layer was padded, so need to reduce dimension - # before unpooling. In the paper, padding is replaced with convolution for this purpose of reducing the depth! - with scope('upsampling'): - with scope('unpool'): - net_unpool = conv(inputs, output_depth, [1, 1]) - net_unpool = bn(net_unpool) - net_unpool = fluid.layers.resize_bilinear( - net_unpool, out_shape=output_shape[2:]) - - # First 1x1 projection to reduce depth - with scope('block1'): - net = conv(inputs, reduced_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - with scope('block2'): - net = deconv( - net, - reduced_depth, - filter_size=filter_size, - stride=2, - padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - # Final projection with 1x1 kernel - with scope('block3'): - net = conv(net, output_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Regularizer - net = fluid.layers.dropout(net, regularizer_prob, seed=seed) - net = prelu(net, decoder=decoder) - - # Finally, add the unpooling layer and the sub branch together - net = fluid.layers.elementwise_add(net, net_unpool) - net = prelu(net, decoder=decoder) - - return net - - # REGULAR BOTTLENECK - else: - with scope('regular'): - net_main = inputs - - # First projection with 1x1 kernel - with scope('block1'): - net = conv(inputs, reduced_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Second conv block - with scope('block2'): - net = conv( - net, - reduced_depth, [filter_size, filter_size], - padding='same') - net = bn(net) - net = prelu(net, decoder=decoder) - - # Final projection with 1x1 kernel - with scope('block3'): - net = conv(net, output_depth, [1, 1]) - net = bn(net) - net = prelu(net, decoder=decoder) - - # Regularizer - net = fluid.layers.dropout(net, regularizer_prob, seed=seed) - net = prelu(net, decoder=decoder) - - # Add the main branch - net = fluid.layers.elementwise_add(net_main, net) - net = prelu(net, decoder=decoder) - - return net - - -def ENet_stage1(inputs, name_scope='stage1_block'): - with scope(name_scope): - with scope('bottleneck1_0'): - net, inputs_shape_1 \ - = bottleneck(inputs, output_depth=64, filter_size=3, regularizer_prob=0.01, type=DOWNSAMPLING, - name_scope='bottleneck1_0') - with scope('bottleneck1_1'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.01, - name_scope='bottleneck1_1') - with scope('bottleneck1_2'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.01, - name_scope='bottleneck1_2') - with scope('bottleneck1_3'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.01, - name_scope='bottleneck1_3') - with scope('bottleneck1_4'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.01, - name_scope='bottleneck1_4') - return net, inputs_shape_1 - - -def ENet_stage2(inputs, name_scope='stage2_block'): - with scope(name_scope): - net, inputs_shape_2 \ - = bottleneck(inputs, output_depth=128, filter_size=3, regularizer_prob=0.1, type=DOWNSAMPLING, - name_scope='bottleneck2_0') - for i in range(2): - with scope('bottleneck2_{}'.format(str(4 * i + 1))): - net = bottleneck( - net, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - name_scope='bottleneck2_{}'.format(str(4 * i + 1))) - with scope('bottleneck2_{}'.format(str(4 * i + 2))): - net = bottleneck( - net, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - type=DILATED, - dilation_rate=(2**(2 * i + 1)), - name_scope='bottleneck2_{}'.format(str(4 * i + 2))) - with scope('bottleneck2_{}'.format(str(4 * i + 3))): - net = bottleneck( - net, - output_depth=128, - filter_size=5, - regularizer_prob=0.1, - type=ASYMMETRIC, - name_scope='bottleneck2_{}'.format(str(4 * i + 3))) - with scope('bottleneck2_{}'.format(str(4 * i + 4))): - net = bottleneck( - net, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - type=DILATED, - dilation_rate=(2**(2 * i + 2)), - name_scope='bottleneck2_{}'.format(str(4 * i + 4))) - return net, inputs_shape_2 - - -def ENet_stage3(inputs, name_scope='stage3_block'): - with scope(name_scope): - for i in range(2): - with scope('bottleneck3_{}'.format(str(4 * i + 0))): - net = bottleneck( - inputs, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - name_scope='bottleneck3_{}'.format(str(4 * i + 0))) - with scope('bottleneck3_{}'.format(str(4 * i + 1))): - net = bottleneck( - net, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - type=DILATED, - dilation_rate=(2**(2 * i + 1)), - name_scope='bottleneck3_{}'.format(str(4 * i + 1))) - with scope('bottleneck3_{}'.format(str(4 * i + 2))): - net = bottleneck( - net, - output_depth=128, - filter_size=5, - regularizer_prob=0.1, - type=ASYMMETRIC, - name_scope='bottleneck3_{}'.format(str(4 * i + 2))) - with scope('bottleneck3_{}'.format(str(4 * i + 3))): - net = bottleneck( - net, - output_depth=128, - filter_size=3, - regularizer_prob=0.1, - type=DILATED, - dilation_rate=(2**(2 * i + 2)), - name_scope='bottleneck3_{}'.format(str(4 * i + 3))) - return net - - -def ENet_stage4(inputs, - inputs_shape, - connect_tensor, - skip_connections=True, - name_scope='stage4_block'): - with scope(name_scope): - with scope('bottleneck4_0'): - net = bottleneck( - inputs, - output_depth=64, - filter_size=3, - regularizer_prob=0.1, - type=UPSAMPLING, - decoder=True, - output_shape=inputs_shape, - name_scope='bottleneck4_0') - - if skip_connections: - net = fluid.layers.elementwise_add(net, connect_tensor) - with scope('bottleneck4_1'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.1, - decoder=True, - name_scope='bottleneck4_1') - with scope('bottleneck4_2'): - net = bottleneck( - net, - output_depth=64, - filter_size=3, - regularizer_prob=0.1, - decoder=True, - name_scope='bottleneck4_2') - - return net - - -def ENet_stage5(inputs, - inputs_shape, - connect_tensor, - skip_connections=True, - name_scope='stage5_block'): - with scope(name_scope): - net = bottleneck( - inputs, - output_depth=16, - filter_size=3, - regularizer_prob=0.1, - type=UPSAMPLING, - decoder=True, - output_shape=inputs_shape, - name_scope='bottleneck5_0') - - if skip_connections: - net = fluid.layers.elementwise_add(net, connect_tensor) - with scope('bottleneck5_1'): - net = bottleneck( - net, - output_depth=16, - filter_size=3, - regularizer_prob=0.1, - decoder=True, - name_scope='bottleneck5_1') - return net - - -def decoder(input, num_classes): - if 'enet' in cfg.MODEL.LANENET.BACKBONE: - # Segmentation branch - with scope('LaneNetSeg'): - initial, stage1, stage2, inputs_shape_1, inputs_shape_2 = input - segStage3 = ENet_stage3(stage2) - segStage4 = ENet_stage4(segStage3, inputs_shape_2, stage1) - segStage5 = ENet_stage5(segStage4, inputs_shape_1, initial) - segLogits = deconv( - segStage5, num_classes, filter_size=2, stride=2, padding='SAME') - - # Embedding branch - with scope('LaneNetEm'): - emStage3 = ENet_stage3(stage2) - emStage4 = ENet_stage4(emStage3, inputs_shape_2, stage1) - emStage5 = ENet_stage5(emStage4, inputs_shape_1, initial) - emLogits = deconv( - emStage5, 4, filter_size=2, stride=2, padding='SAME') - - elif 'vgg' in cfg.MODEL.LANENET.BACKBONE: - encoder_list = ['pool5', 'pool4', 'pool3'] - # score stage - input_tensor = input[encoder_list[0]] - with scope('score_origin'): - score = conv(input_tensor, 64, 1) - encoder_list = encoder_list[1:] - for i in range(len(encoder_list)): - with scope('deconv_{:d}'.format(i + 1)): - deconv_out = deconv( - score, 64, filter_size=4, stride=2, padding='SAME') - input_tensor = input[encoder_list[i]] - with scope('score_{:d}'.format(i + 1)): - score = conv(input_tensor, 64, 1) - score = fluid.layers.elementwise_add(deconv_out, score) - - with scope('deconv_final'): - emLogits = deconv( - score, 64, filter_size=16, stride=8, padding='SAME') - with scope('score_final'): - segLogits = conv(emLogits, num_classes, 1) - emLogits = relu(conv(emLogits, 4, 1)) - return segLogits, emLogits - - -def encoder(input): - if 'vgg' in cfg.MODEL.LANENET.BACKBONE: - model = vgg_backbone(layers=16) - # output = model.net(input) - - _, encode_feature_dict = model.net( - input, end_points=13, decode_points=[7, 10, 13]) - output = {} - output['pool3'] = encode_feature_dict[7] - output['pool4'] = encode_feature_dict[10] - output['pool5'] = encode_feature_dict[13] - elif 'enet' in cfg.MODEL.LANENET.BACKBONE: - with scope('LaneNetBase'): - initial = initial_block(input) - stage1, inputs_shape_1 = ENet_stage1(initial) - stage2, inputs_shape_2 = ENet_stage2(stage1) - output = (initial, stage1, stage2, inputs_shape_1, inputs_shape_2) - else: - raise Exception( - "LaneNet expect enet and vgg backbone, but received {}".format( - cfg.MODEL.LANENET.BACKBONE)) - return output - - -def lanenet(img, num_classes): - output = encoder(img) - segLogits, emLogits = decoder(output, num_classes) - - return segLogits, emLogits diff --git a/legacy/contrib/LaneNet/reader.py b/legacy/contrib/LaneNet/reader.py deleted file mode 100644 index b051c71a1a..0000000000 --- a/legacy/contrib/LaneNet/reader.py +++ /dev/null @@ -1,329 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import sys -import os -import time -import codecs - -import numpy as np -import cv2 - -from utils.config import cfg -import data_aug as aug -from pdseg.data_utils import GeneratorEnqueuer -from models.model_builder import ModelPhase -import copy - - -def cv2_imread(file_path, flag=cv2.IMREAD_COLOR): - # resolve cv2.imread open Chinese file path issues on Windows Platform. - return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag) - - -class LaneNetDataset(): - def __init__(self, - file_list, - data_dir, - shuffle=False, - mode=ModelPhase.TRAIN): - self.mode = mode - self.shuffle = shuffle - self.data_dir = data_dir - - self.shuffle_seed = 0 - # NOTE: Please ensure file list was save in UTF-8 coding format - with codecs.open(file_list, 'r', 'utf-8') as flist: - self.lines = [line.strip() for line in flist] - self.all_lines = copy.deepcopy(self.lines) - if shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - elif shuffle: - np.random.shuffle(self.lines) - - def generator(self): - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // cfg.NUM_TRAINERS - self.lines = self.all_lines[num_lines * cfg.TRAINER_ID:num_lines * - (cfg.TRAINER_ID + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - for line in self.lines: - yield self.process_image(line, self.data_dir, self.mode) - - def sharding_generator(self, pid=0, num_processes=1): - """ - Use line id as shard key for multiprocess io - It's a normal generator if pid=0, num_processes=1 - """ - for index, line in enumerate(self.lines): - # Use index and pid to shard file list - if index % num_processes == pid: - yield self.process_image(line, self.data_dir, self.mode) - - def batch_reader(self, batch_size): - br = self.batch(self.reader, batch_size) - for batch in br: - yield batch[0], batch[1], batch[2] - - def multiprocess_generator(self, max_queue_size=32, num_processes=8): - # Re-shuffle file list - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // self.num_trainers - self.lines = self.all_lines[num_lines * self.trainer_id:num_lines * - (self.trainer_id + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - # Create multiple sharding generators according to num_processes for multiple processes - generators = [] - for pid in range(num_processes): - generators.append(self.sharding_generator(pid, num_processes)) - - try: - enqueuer = GeneratorEnqueuer(generators) - enqueuer.start(max_queue_size=max_queue_size, workers=num_processes) - while True: - generator_out = None - while enqueuer.is_running(): - if not enqueuer.queue.empty(): - generator_out = enqueuer.queue.get(timeout=5) - break - else: - time.sleep(0.01) - if generator_out is None: - break - yield generator_out - finally: - if enqueuer is not None: - enqueuer.stop() - - def batch(self, reader, batch_size, is_test=False, drop_last=False): - def batch_reader(is_test=False, drop_last=drop_last): - if is_test: - imgs, grts, grts_instance, img_names, valid_shapes, org_shapes = [], [], [], [], [], [] - for img, grt, grt_instance, img_name, valid_shape, org_shape in reader( - ): - imgs.append(img) - grts.append(grt) - grts_instance.append(grt_instance) - img_names.append(img_name) - valid_shapes.append(valid_shape) - org_shapes.append(org_shape) - if len(imgs) == batch_size: - yield np.array(imgs), np.array(grts), np.array( - grts_instance), img_names, np.array( - valid_shapes), np.array(org_shapes) - imgs, grts, grts_instance, img_names, valid_shapes, org_shapes = [], [], [], [], [], [] - - if not drop_last and len(imgs) > 0: - yield np.array(imgs), np.array(grts), np.array( - grts_instance), img_names, np.array( - valid_shapes), np.array(org_shapes) - else: - imgs, labs, labs_instance, ignore = [], [], [], [] - bs = 0 - for img, lab, lab_instance, ig in reader(): - imgs.append(img) - labs.append(lab) - labs_instance.append(lab_instance) - ignore.append(ig) - bs += 1 - if bs == batch_size: - yield np.array(imgs), np.array(labs), np.array( - labs_instance), np.array(ignore) - bs = 0 - imgs, labs, labs_instance, ignore = [], [], [], [] - - if not drop_last and bs > 0: - yield np.array(imgs), np.array(labs), np.array( - labs_instance), np.array(ignore) - - return batch_reader(is_test, drop_last) - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - # original image cv2.imread flag setting - cv2_imread_flag = cv2.IMREAD_COLOR - if cfg.DATASET.IMAGE_TYPE == "rgba": - # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to - # reserver alpha channel - cv2_imread_flag = cv2.IMREAD_UNCHANGED - - parts = line.strip().split(cfg.DATASET.SEPARATOR) - if len(parts) != 3: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception("File list format incorrect! It should be" - " image_name{}label_name\\n".format( - cfg.DATASET.SEPARATOR)) - img_name, grt_name, grt_instance_name = parts[0], None, None - else: - img_name, grt_name, grt_instance_name = parts[0], parts[1], parts[2] - - img_path = os.path.join(src_dir, img_name) - img = cv2_imread(img_path, cv2_imread_flag) - - if grt_name is not None: - grt_path = os.path.join(src_dir, grt_name) - grt_instance_path = os.path.join(src_dir, grt_instance_name) - grt = cv2_imread(grt_path, cv2.IMREAD_GRAYSCALE) - grt[grt == 255] = 1 - grt[grt != 1] = 0 - grt_instance = cv2_imread(grt_instance_path, cv2.IMREAD_GRAYSCALE) - else: - grt = None - grt_instance = None - - if img is None: - raise Exception( - "Empty image, src_dir: {}, img: {} & lab: {}".format( - src_dir, img_path, grt_path)) - - img_height = img.shape[0] - img_width = img.shape[1] - - if grt is not None: - grt_height = grt.shape[0] - grt_width = grt.shape[1] - - if img_height != grt_height or img_width != grt_width: - raise Exception( - "source img and label img must has the same size") - else: - if mode == ModelPhase.TRAIN or mode == ModelPhase.EVAL: - raise Exception( - "Empty image, src_dir: {}, img: {} & lab: {}".format( - src_dir, img_path, grt_path)) - - if len(img.shape) < 3: - img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - img_channels = img.shape[2] - if img_channels < 3: - raise Exception("PaddleSeg only supports gray, rgb or rgba image") - if img_channels != cfg.DATASET.DATA_DIM: - raise Exception( - "Input image channel({}) is not match cfg.DATASET.DATA_DIM({}), img_name={}" - .format(img_channels, cfg.DATASET.DATADIM, img_name)) - if img_channels != len(cfg.MEAN): - raise Exception( - "img name {}, img chns {} mean size {}, size unequal".format( - img_name, img_channels, len(cfg.MEAN))) - if img_channels != len(cfg.STD): - raise Exception( - "img name {}, img chns {} std size {}, size unequal".format( - img_name, img_channels, len(cfg.STD))) - - return img, grt, grt_instance, img_name, grt_name - - def normalize_image(self, img): - """ 像素归一化后减均值除方差 """ - img = img.transpose((2, 0, 1)).astype('float32') / 255.0 - img_mean = np.array(cfg.MEAN).reshape((len(cfg.MEAN), 1, 1)) - img_std = np.array(cfg.STD).reshape((len(cfg.STD), 1, 1)) - img -= img_mean - img /= img_std - - return img - - def process_image(self, line, data_dir, mode): - """ process_image """ - img, grt, grt_instance, img_name, grt_name = self.load_image( - line, data_dir, mode=mode) - if mode == ModelPhase.TRAIN: - img, grt, grt_instance = aug.resize(img, grt, grt_instance, mode) - if cfg.AUG.RICH_CROP.ENABLE: - if cfg.AUG.RICH_CROP.BLUR: - if cfg.AUG.RICH_CROP.BLUR_RATIO <= 0: - n = 0 - elif cfg.AUG.RICH_CROP.BLUR_RATIO >= 1: - n = 1 - else: - n = int(1.0 / cfg.AUG.RICH_CROP.BLUR_RATIO) - if n > 0: - if np.random.randint(0, n) == 0: - radius = np.random.randint(3, 10) - if radius % 2 != 1: - radius = radius + 1 - if radius > 9: - radius = 9 - img = cv2.GaussianBlur(img, (radius, radius), 0, 0) - - img, grt = aug.random_rotation( - img, - grt, - rich_crop_max_rotation=cfg.AUG.RICH_CROP.MAX_ROTATION, - mean_value=cfg.DATASET.PADDING_VALUE) - - img, grt = aug.rand_scale_aspect( - img, - grt, - rich_crop_min_scale=cfg.AUG.RICH_CROP.MIN_AREA_RATIO, - rich_crop_aspect_ratio=cfg.AUG.RICH_CROP.ASPECT_RATIO) - img = aug.hsv_color_jitter( - img, - brightness_jitter_ratio=cfg.AUG.RICH_CROP. - BRIGHTNESS_JITTER_RATIO, - saturation_jitter_ratio=cfg.AUG.RICH_CROP. - SATURATION_JITTER_RATIO, - contrast_jitter_ratio=cfg.AUG.RICH_CROP. - CONTRAST_JITTER_RATIO) - - if cfg.AUG.FLIP: - if cfg.AUG.FLIP_RATIO <= 0: - n = 0 - elif cfg.AUG.FLIP_RATIO >= 1: - n = 1 - else: - n = int(1.0 / cfg.AUG.FLIP_RATIO) - if n > 0: - if np.random.randint(0, n) == 0: - img = img[::-1, :, :] - grt = grt[::-1, :] - - if cfg.AUG.MIRROR: - if np.random.randint(0, 2) == 1: - img = img[:, ::-1, :] - grt = grt[:, ::-1] - - img, grt = aug.rand_crop(img, grt, mode=mode) - elif ModelPhase.is_eval(mode): - img, grt, grt_instance = aug.resize( - img, grt, grt_instance, mode=mode) - elif ModelPhase.is_visual(mode): - ori_img = img.copy() - img, grt, grt_instance = aug.resize( - img, grt, grt_instance, mode=mode) - valid_shape = [img.shape[0], img.shape[1]] - else: - raise ValueError("Dataset mode={} Error!".format(mode)) - - # Normalize image - img = self.normalize_image(img) - - if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode): - grt = np.expand_dims(np.array(grt).astype('int32'), axis=0) - ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32') - if ModelPhase.is_train(mode): - return (img, grt, grt_instance, ignore) - elif ModelPhase.is_eval(mode): - return (img, grt, grt_instance, ignore) - elif ModelPhase.is_visual(mode): - return (img, grt, grt_instance, img_name, valid_shape, ori_img) diff --git a/legacy/contrib/LaneNet/requirements.txt b/legacy/contrib/LaneNet/requirements.txt deleted file mode 100644 index 5246f26502..0000000000 --- a/legacy/contrib/LaneNet/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -pre-commit -yapf == 0.26.0 -flake8 -pyyaml >= 5.1 -Pillow -numpy -six -opencv-python -tqdm -requests -sklearn -visualdl == 2.0.0b1 diff --git a/legacy/contrib/LaneNet/train.py b/legacy/contrib/LaneNet/train.py deleted file mode 100644 index 0fb251c3cf..0000000000 --- a/legacy/contrib/LaneNet/train.py +++ /dev/null @@ -1,392 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(os.path.split(cur_path)[0])[0] -SEG_PATH = os.path.join(cur_path, "../../../") -sys.path.append(SEG_PATH) -sys.path.append(root_path) - -import argparse -import pprint - -import numpy as np -import paddle.fluid as fluid - -from utils.config import cfg -from pdseg.utils.timer import Timer, calculate_eta -from reader import LaneNetDataset -from models.model_builder import build_model -from models.model_builder import ModelPhase -from eval import evaluate -from vis import visualize -from utils import dist_utils -from utils.load_model_utils import load_pretrained_weights - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddleSeg training') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess I/O or not', - action='store_true', - default=False) - parser.add_argument( - '--log_steps', - dest='log_steps', - help='Display logging information at every log_steps', - default=10, - type=int) - parser.add_argument( - '--debug', - dest='debug', - help='debug mode, display detail information of training', - action='store_true') - parser.add_argument( - '--use_vdl', - dest='use_vdl', - help='whether to record the data during training to VisualDL', - action='store_true') - parser.add_argument( - '--vdl_log_dir', - dest='vdl_log_dir', - help='VisualDL logging directory', - default=None, - type=str) - parser.add_argument( - '--do_eval', - dest='do_eval', - help='Evaluation models result on every new checkpoint', - action='store_true') - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - return parser.parse_args() - - -def save_checkpoint(exe, program, ckpt_name): - """ - Save checkpoint for evaluation or resume training - """ - ckpt_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, str(ckpt_name)) - print("Save model checkpoint to {}".format(ckpt_dir)) - if not os.path.isdir(ckpt_dir): - os.makedirs(ckpt_dir) - - fluid.save(program, os.path.join(ckpt_dir, 'model')) - - return ckpt_dir - - -def load_checkpoint(exe, program): - """ - Load checkpoiont for resuming training - """ - model_path = cfg.TRAIN.RESUME_MODEL_DIR - print('Resume model training from:', model_path) - if not os.path.exists(model_path): - raise ValueError( - "TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path)) - fluid.load(program, os.path.join(model_path, 'model'), exe) - - # Check is path ended by path spearator - if model_path[-1] == os.sep: - model_path = model_path[0:-1] - epoch_name = os.path.basename(model_path) - # If resume model is final model - if epoch_name == 'final': - begin_epoch = cfg.SOLVER.NUM_EPOCHS - # If resume model path is end of digit, restore epoch status - elif epoch_name.isdigit(): - epoch = int(epoch_name) - begin_epoch = epoch + 1 - else: - raise ValueError("Resume model path is not valid!") - print("Model checkpoint loaded successfully!") - return begin_epoch - - -def print_info(*msg): - if cfg.TRAINER_ID == 0: - print(*msg) - - -def train(cfg): - startup_prog = fluid.Program() - train_prog = fluid.Program() - drop_last = True - - dataset = LaneNetDataset( - file_list=cfg.DATASET.TRAIN_FILE_LIST, - mode=ModelPhase.TRAIN, - shuffle=True, - data_dir=cfg.DATASET.DATA_DIR) - - def data_generator(): - if args.use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - batch_data = [] - for b in data_gen: - batch_data.append(b) - if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): - for item in batch_data: - yield item - batch_data = [] - - # Get device environment - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() - places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() - - # Get number of GPU - dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) - print_info("#Device count: {}".format(dev_count)) - - # Make sure BATCH_SIZE can divided by GPU cards - assert cfg.BATCH_SIZE % dev_count == 0, ( - 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( - cfg.BATCH_SIZE, dev_count)) - # If use multi-gpu training mode, batch data will allocated to each GPU evenly - batch_size_per_dev = cfg.BATCH_SIZE // dev_count - cfg.BATCH_SIZE_PER_DEV = batch_size_per_dev - print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - - data_loader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model( - train_prog, startup_prog, phase=ModelPhase.TRAIN) - data_loader.set_sample_generator( - data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - exec_strategy = fluid.ExecutionStrategy() - # Clear temporary variables every 100 iteration - if args.use_gpu: - exec_strategy.num_threads = fluid.core.get_cuda_device_count() - exec_strategy.num_iteration_per_drop_scope = 100 - build_strategy = fluid.BuildStrategy() - - if cfg.NUM_TRAINERS > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) - exec_strategy.num_threads = 1 - - if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: - if dev_count > 1: - # Apply sync batch norm strategy - print_info("Sync BatchNorm strategy is effective.") - build_strategy.sync_batch_norm = True - else: - print_info( - "Sync BatchNorm strategy will not be effective if GPU device" - " count <= 1") - compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( - loss_name=avg_loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) - - # Resume training - begin_epoch = cfg.SOLVER.BEGIN_EPOCH - if cfg.TRAIN.RESUME_MODEL_DIR: - begin_epoch = load_checkpoint(exe, train_prog) - # Load pretrained model - elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): - load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) - else: - print_info( - 'Pretrained model dir {} not exists, training from scratch...'. - format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) - - # fetch_list = [avg_loss.name, lr.name, accuracy.name, precision.name, recall.name] - fetch_list = [ - avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name, - fp.name, fn.name - ] - if args.debug: - # Fetch more variable info and use streaming confusion matrix to - # calculate IoU results if in debug mode - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - fetch_list.extend([pred.name, grts.name, masks.name]) - # cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) - - if args.use_vdl: - if not args.vdl_log_dir: - print_info("Please specify the log directory by --vdl_log_dir.") - exit(1) - - from visualdl import LogWriter - log_writer = LogWriter(args.vdl_log_dir) - - # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - step = 0 - all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE - if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: - all_step += 1 - all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) - - avg_loss = 0.0 - avg_seg_loss = 0.0 - avg_emb_loss = 0.0 - avg_acc = 0.0 - avg_fp = 0.0 - avg_fn = 0.0 - timer = Timer() - timer.start() - if begin_epoch > cfg.SOLVER.NUM_EPOCHS: - raise ValueError( - ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( - begin_epoch, cfg.SOLVER.NUM_EPOCHS)) - - if args.use_mpio: - print_info("Use multiprocess reader") - else: - print_info("Use multi-thread reader") - - for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - data_loader.start() - while True: - try: - # If not in debug mode, avoid unnessary log and calculate - loss, lr, out_seg_loss, out_emb_loss, out_acc, out_fp, out_fn = exe.run( - program=compiled_train_prog, - fetch_list=fetch_list, - return_numpy=True) - - avg_loss += np.mean(np.array(loss)) - avg_seg_loss += np.mean(np.array(out_seg_loss)) - avg_emb_loss += np.mean(np.array(out_emb_loss)) - avg_acc += np.mean(out_acc) - avg_fp += np.mean(out_fp) - avg_fn += np.mean(out_fn) - step += 1 - - if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: - avg_loss /= args.log_steps - avg_seg_loss /= args.log_steps - avg_emb_loss /= args.log_steps - avg_acc /= args.log_steps - avg_fp /= args.log_steps - avg_fn /= args.log_steps - speed = args.log_steps / timer.elapsed_time() - print(( - "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}" - ).format(epoch, step, lr[0], avg_loss, avg_seg_loss, - avg_emb_loss, avg_acc, avg_fp, avg_fn, speed, - calculate_eta(all_step - step, speed))) - if args.use_vdl: - log_writer.add_scalar('Train/loss', avg_loss, step) - log_writer.add_scalar('Train/lr', lr[0], step) - log_writer.add_scalar('Train/speed', speed, step) - sys.stdout.flush() - avg_loss = 0.0 - avg_seg_loss = 0.0 - avg_emb_loss = 0.0 - avg_acc = 0.0 - avg_fp = 0.0 - avg_fn = 0.0 - timer.restart() - - except fluid.core.EOFException: - data_loader.reset() - break - except Exception as e: - print(e) - - if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0: - ckpt_dir = save_checkpoint(exe, train_prog, epoch) - - if args.do_eval: - print("Evaluation start") - accuracy, fp, fn = evaluate( - cfg=cfg, - ckpt_dir=ckpt_dir, - use_gpu=args.use_gpu, - use_mpio=args.use_mpio) - if args.use_vdl: - log_writer.add_scalar('Evaluate/accuracy', accuracy, step) - log_writer.add_scalar('Evaluate/fp', fp, step) - log_writer.add_scalar('Evaluate/fn', fn, step) - - # Use VisualDL to visualize results - if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: - visualize( - cfg=cfg, - use_gpu=args.use_gpu, - vis_file_list=cfg.DATASET.VIS_FILE_LIST, - vis_dir="visual", - ckpt_dir=ckpt_dir, - log_writer=log_writer) - - # save final model - if cfg.TRAINER_ID == 0: - save_checkpoint(exe, train_prog, 'final') - - -def main(args): - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - - cfg.TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", 0)) - cfg.NUM_TRAINERS = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - - cfg.check_and_infer() - print_info(pprint.pformat(cfg)) - train(cfg) - - -if __name__ == '__main__': - args = parse_args() - if fluid.core.is_compiled_with_cuda() != True and args.use_gpu == True: - print( - "You can not set use_gpu = True in the model because you are using paddlepaddle-cpu." - ) - print( - "Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu=False to run models on CPU." - ) - sys.exit(1) - main(args) diff --git a/legacy/contrib/LaneNet/utils/__init__.py b/legacy/contrib/LaneNet/utils/__init__.py deleted file mode 100644 index 0e7c9f3954..0000000000 --- a/legacy/contrib/LaneNet/utils/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/LaneNet/utils/config.py b/legacy/contrib/LaneNet/utils/config.py deleted file mode 100644 index 4915b7b78e..0000000000 --- a/legacy/contrib/LaneNet/utils/config.py +++ /dev/null @@ -1,233 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -from __future__ import unicode_literals - -import os -import sys - -# LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -# PDSEG_PATH = os.path.join(LOCAL_PATH, "../../../", "pdseg") -# print(PDSEG_PATH) -# sys.path.insert(0, PDSEG_PATH) -# print(sys.path) - -from pdseg.utils.collect import SegConfig -import numpy as np - -cfg = SegConfig() - -########################## 基本配置 ########################################### -# 均值,图像预处理减去的均值 -cfg.MEAN = [0.5, 0.5, 0.5] -# 标准差,图像预处理除以标准差· -cfg.STD = [0.5, 0.5, 0.5] -# 批处理大小 -cfg.BATCH_SIZE = 1 -# 验证时图像裁剪尺寸(宽,高) -cfg.EVAL_CROP_SIZE = tuple() -# 训练时图像裁剪尺寸(宽,高) -cfg.TRAIN_CROP_SIZE = tuple() -# 多进程训练总进程数 -cfg.NUM_TRAINERS = 1 -# 多进程训练进程ID -cfg.TRAINER_ID = 0 -# 每张gpu上的批大小,无需设置,程序会自动根据batch调整 -cfg.BATCH_SIZE_PER_DEV = 1 -########################## 数据载入配置 ####################################### -# 数据载入时的并发数, 建议值8 -cfg.DATALOADER.NUM_WORKERS = 8 -# 数据载入时缓存队列大小, 建议值256 -cfg.DATALOADER.BUF_SIZE = 256 - -########################## 数据集配置 ######################################### -# 数据主目录目录 -cfg.DATASET.DATA_DIR = './dataset/cityscapes/' -# 训练集列表 -cfg.DATASET.TRAIN_FILE_LIST = './dataset/cityscapes/train.list' -# 训练集数量 -cfg.DATASET.TRAIN_TOTAL_IMAGES = 2975 -# 验证集列表 -cfg.DATASET.VAL_FILE_LIST = './dataset/cityscapes/val.list' -# 验证数据数量 -cfg.DATASET.VAL_TOTAL_IMAGES = 500 -# 测试数据列表 -cfg.DATASET.TEST_FILE_LIST = './dataset/cityscapes/test.list' -# 测试数据数量 -cfg.DATASET.TEST_TOTAL_IMAGES = 500 -# VisualDL 可视化的数据集 -cfg.DATASET.VIS_FILE_LIST = None -# 类别数(需包括背景类) -cfg.DATASET.NUM_CLASSES = 19 -# 输入图像类型, 支持三通道'rgb',四通道'rgba',单通道灰度图'gray' -cfg.DATASET.IMAGE_TYPE = 'rgb' -# 输入图片的通道数 -cfg.DATASET.DATA_DIM = 3 -# 数据列表分割符, 默认为空格 -cfg.DATASET.SEPARATOR = ' ' -# 忽略的像素标签值, 默认为255,一般无需改动 -cfg.DATASET.IGNORE_INDEX = 255 -# 数据增强是图像的padding值 -cfg.DATASET.PADDING_VALUE = [127.5, 127.5, 127.5] - -########################### 数据增强配置 ###################################### -# 图像镜像左右翻转 -cfg.AUG.MIRROR = True -# 图像上下翻转开关,True/False -cfg.AUG.FLIP = False -# 图像启动上下翻转的概率,0-1 -cfg.AUG.FLIP_RATIO = 0.5 -# 图像resize的固定尺寸(宽,高),非负 -cfg.AUG.FIX_RESIZE_SIZE = tuple() -# 图像resize的方式有三种: -# unpadding(固定尺寸),stepscaling(按比例resize),rangescaling(长边对齐) -cfg.AUG.AUG_METHOD = 'rangescaling' -# 图像resize方式为stepscaling,resize最小尺度,非负 -cfg.AUG.MIN_SCALE_FACTOR = 0.5 -# 图像resize方式为stepscaling,resize最大尺度,不小于MIN_SCALE_FACTOR -cfg.AUG.MAX_SCALE_FACTOR = 2.0 -# 图像resize方式为stepscaling,resize尺度范围间隔,非负 -cfg.AUG.SCALE_STEP_SIZE = 0.25 -# 图像resize方式为rangescaling,训练时长边resize的范围最小值,非负 -cfg.AUG.MIN_RESIZE_VALUE = 400 -# 图像resize方式为rangescaling,训练时长边resize的范围最大值, -# 不小于MIN_RESIZE_VALUE -cfg.AUG.MAX_RESIZE_VALUE = 600 -# 图像resize方式为rangescaling, 测试验证可视化模式下长边resize的长度, -# 在MIN_RESIZE_VALUE到MAX_RESIZE_VALUE范围内 -cfg.AUG.INF_RESIZE_VALUE = 500 - -# RichCrop数据增广开关,用于提升模型鲁棒性 -cfg.AUG.RICH_CROP.ENABLE = False -# 图像旋转最大角度,0-90 -cfg.AUG.RICH_CROP.MAX_ROTATION = 15 -# 裁取图像与原始图像面积比,0-1 -cfg.AUG.RICH_CROP.MIN_AREA_RATIO = 0.5 -# 裁取图像宽高比范围,非负 -cfg.AUG.RICH_CROP.ASPECT_RATIO = 0.33 -# 亮度调节范围,0-1 -cfg.AUG.RICH_CROP.BRIGHTNESS_JITTER_RATIO = 0.5 -# 饱和度调节范围,0-1 -cfg.AUG.RICH_CROP.SATURATION_JITTER_RATIO = 0.5 -# 对比度调节范围,0-1 -cfg.AUG.RICH_CROP.CONTRAST_JITTER_RATIO = 0.5 -# 图像模糊开关,True/False -cfg.AUG.RICH_CROP.BLUR = False -# 图像启动模糊百分比,0-1 -cfg.AUG.RICH_CROP.BLUR_RATIO = 0.1 - -########################### 训练配置 ########################################## -# 模型保存路径 -cfg.TRAIN.MODEL_SAVE_DIR = '' -# 预训练模型路径 -cfg.TRAIN.PRETRAINED_MODEL_DIR = '' -# 是否resume,继续训练 -cfg.TRAIN.RESUME_MODEL_DIR = '' -# 是否使用多卡间同步BatchNorm均值和方差 -cfg.TRAIN.SYNC_BATCH_NORM = False -# 模型参数保存的epoch间隔数,可用来继续训练中断的模型 -cfg.TRAIN.SNAPSHOT_EPOCH = 10 - -########################### 模型优化相关配置 ################################## -# 初始学习率 -cfg.SOLVER.LR = 0.1 -# 学习率下降方法, 支持poly piecewise cosine 三种 -cfg.SOLVER.LR_POLICY = "poly" -# 优化算法, 支持SGD和Adam两种算法 -cfg.SOLVER.OPTIMIZER = "sgd" -# 动量参数 -cfg.SOLVER.MOMENTUM = 0.9 -# 二阶矩估计的指数衰减率 -cfg.SOLVER.MOMENTUM2 = 0.999 -# 学习率Poly下降指数 -cfg.SOLVER.POWER = 0.9 -# step下降指数 -cfg.SOLVER.GAMMA = 0.1 -# step下降间隔 -cfg.SOLVER.DECAY_EPOCH = [10, 20] -# 学习率权重衰减,0-1 -cfg.SOLVER.WEIGHT_DECAY = 0.00004 -# 训练开始epoch数,默认为1 -cfg.SOLVER.BEGIN_EPOCH = 1 -# 训练epoch数,正整数 -cfg.SOLVER.NUM_EPOCHS = 30 -# loss的选择,支持softmax_loss, bce_loss, dice_loss -cfg.SOLVER.LOSS = ["softmax_loss"] -# cross entropy weight, 默认为None,如果设置为'dynamic',会根据每个batch中各个类别的数目, -# 动态调整类别权重。 -# 也可以设置一个静态权重(list的方式),比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9] -cfg.SOLVER.CROSS_ENTROPY_WEIGHT = None -########################## 测试配置 ########################################### -# 测试模型路径 -cfg.TEST.TEST_MODEL = '' - -########################## 模型通用配置 ####################################### -# 模型名称, 支持deeplab, unet, icnet三种 -cfg.MODEL.MODEL_NAME = '' -# BatchNorm类型: bn、gn(group_norm) -cfg.MODEL.DEFAULT_NORM_TYPE = 'bn' -# 多路损失加权值 -cfg.MODEL.MULTI_LOSS_WEIGHT = [1.0] -# DEFAULT_NORM_TYPE为gn时group数 -cfg.MODEL.DEFAULT_GROUP_NUMBER = 32 -# 极小值, 防止分母除0溢出,一般无需改动 -cfg.MODEL.DEFAULT_EPSILON = 1e-5 -# BatchNorm动量, 一般无需改动 -cfg.MODEL.BN_MOMENTUM = 0.99 -# 是否使用FP16训练 -cfg.MODEL.FP16 = False -# 混合精度训练需对LOSS进行scale, 默认为动态scale,静态scale可以设置为512.0 -cfg.MODEL.SCALE_LOSS = "DYNAMIC" - -########################## DeepLab模型配置 #################################### -# DeepLab backbone 配置, 可选项xception_65, mobilenetv2 -cfg.MODEL.DEEPLAB.BACKBONE = "xception_65" -# DeepLab output stride -cfg.MODEL.DEEPLAB.OUTPUT_STRIDE = 16 -# MobileNet backbone scale 设置 -cfg.MODEL.DEEPLAB.DEPTH_MULTIPLIER = 1.0 -# MobileNet backbone scale 设置 -cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP = True -# MobileNet backbone scale 设置 -cfg.MODEL.DEEPLAB.ENABLE_DECODER = True -# ASPP是否使用可分离卷积 -cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV = True -# 解码器是否使用可分离卷积 -cfg.MODEL.DEEPLAB.DECODER_USE_SEP_CONV = True - -########################## UNET模型配置 ####################################### -# 上采样方式, 默认为双线性插值 -cfg.MODEL.UNET.UPSAMPLE_MODE = 'bilinear' - -########################## ICNET模型配置 ###################################### -# RESNET backbone scale 设置 -cfg.MODEL.ICNET.DEPTH_MULTIPLIER = 0.5 -# RESNET 层数 设置 -cfg.MODEL.ICNET.LAYERS = 50 - -########################## PSPNET模型配置 ###################################### -# Lannet backbone name -cfg.MODEL.LANENET.BACKBONE = "vgg" - -########################## LaneNet模型配置 ###################################### - -########################## 预测部署模型配置 ################################### -# 预测保存的模型名称 -cfg.FREEZE.MODEL_FILENAME = '__model__' -# 预测保存的参数名称 -cfg.FREEZE.PARAMS_FILENAME = '__params__' -# 预测模型参数保存的路径 -cfg.FREEZE.SAVE_DIR = 'freeze_model' diff --git a/legacy/contrib/LaneNet/utils/dist_utils.py b/legacy/contrib/LaneNet/utils/dist_utils.py deleted file mode 100755 index 1189f75def..0000000000 --- a/legacy/contrib/LaneNet/utils/dist_utils.py +++ /dev/null @@ -1,93 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import paddle.fluid as fluid - - -def nccl2_prepare(args, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - - envs = args.dist_env - - t.transpile( - envs["trainer_id"], - trainers=','.join(envs["trainer_endpoints"]), - current_endpoint=envs["current_endpoint"], - startup_program=startup_prog, - program=main_prog) - - -def pserver_prepare(args, train_prog, startup_prog): - config = fluid.DistributeTranspilerConfig() - config.slice_var_up = args.split_var - t = fluid.DistributeTranspiler(config=config) - envs = args.dist_env - training_role = envs["training_role"] - - t.transpile( - envs["trainer_id"], - program=train_prog, - pservers=envs["pserver_endpoints"], - trainers=envs["num_trainers"], - sync_mode=not args.async_mode, - startup_program=startup_prog) - if training_role == "PSERVER": - pserver_program = t.get_pserver_program(envs["current_endpoint"]) - pserver_startup_program = t.get_startup_program( - envs["current_endpoint"], - pserver_program, - startup_program=startup_prog) - return pserver_program, pserver_startup_program - elif training_role == "TRAINER": - train_program = t.get_trainer_program() - return train_program, startup_prog - else: - raise ValueError( - 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' - ) - - -def nccl2_prepare_paddle(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - - -def prepare_for_multi_process(exe, build_strategy, train_prog): - # prepare for multi-process - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: return - - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - # NOTE(zcd): use multi processes to train the model, - # and each process use one GPU card. - startup_prog = fluid.Program() - nccl2_prepare_paddle(trainer_id, startup_prog, train_prog) - # the startup_prog are run two times, but it doesn't matter. - exe.run(startup_prog) diff --git a/legacy/contrib/LaneNet/utils/generate_tusimple_dataset.py b/legacy/contrib/LaneNet/utils/generate_tusimple_dataset.py deleted file mode 100644 index ad83c275f7..0000000000 --- a/legacy/contrib/LaneNet/utils/generate_tusimple_dataset.py +++ /dev/null @@ -1,209 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -generate tusimple training dataset -""" -import argparse -import glob -import json -import os -import os.path as ops -import shutil - -import cv2 -import numpy as np - - -def init_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--src_dir', - type=str, - help='The origin path of unzipped tusimple dataset') - - return parser.parse_args() - - -def process_json_file(json_file_path, src_dir, ori_dst_dir, binary_dst_dir, - instance_dst_dir): - - assert ops.exists(json_file_path), '{:s} not exist'.format(json_file_path) - - image_nums = len(os.listdir(os.path.join(src_dir, ori_dst_dir))) - - with open(json_file_path, 'r') as file: - for line_index, line in enumerate(file): - info_dict = json.loads(line) - - image_dir = ops.split(info_dict['raw_file'])[0] - image_dir_split = image_dir.split('/')[1:] - image_dir_split.append(ops.split(info_dict['raw_file'])[1]) - image_name = '_'.join(image_dir_split) - image_path = ops.join(src_dir, info_dict['raw_file']) - assert ops.exists(image_path), '{:s} not exist'.format(image_path) - - h_samples = info_dict['h_samples'] - lanes = info_dict['lanes'] - - image_name_new = '{:s}.png'.format( - '{:d}'.format(line_index + image_nums).zfill(4)) - - src_image = cv2.imread(image_path, cv2.IMREAD_COLOR) - dst_binary_image = np.zeros( - [src_image.shape[0], src_image.shape[1]], np.uint8) - dst_instance_image = np.zeros( - [src_image.shape[0], src_image.shape[1]], np.uint8) - - for lane_index, lane in enumerate(lanes): - assert len(h_samples) == len(lane) - lane_x = [] - lane_y = [] - for index in range(len(lane)): - if lane[index] == -2: - continue - else: - ptx = lane[index] - pty = h_samples[index] - lane_x.append(ptx) - lane_y.append(pty) - if not lane_x: - continue - lane_pts = np.vstack((lane_x, lane_y)).transpose() - lane_pts = np.array([lane_pts], np.int64) - - cv2.polylines( - dst_binary_image, - lane_pts, - isClosed=False, - color=255, - thickness=5) - cv2.polylines( - dst_instance_image, - lane_pts, - isClosed=False, - color=lane_index * 50 + 20, - thickness=5) - - dst_binary_image_path = ops.join(src_dir, binary_dst_dir, - image_name_new) - dst_instance_image_path = ops.join(src_dir, instance_dst_dir, - image_name_new) - dst_rgb_image_path = ops.join(src_dir, ori_dst_dir, image_name_new) - - cv2.imwrite(dst_binary_image_path, dst_binary_image) - cv2.imwrite(dst_instance_image_path, dst_instance_image) - cv2.imwrite(dst_rgb_image_path, src_image) - - print('Process {:s} success'.format(image_name)) - - -def gen_sample(src_dir, - b_gt_image_dir, - i_gt_image_dir, - image_dir, - phase='train', - split=False): - - label_list = [] - with open('{:s}/{}ing/{}.txt'.format(src_dir, phase, phase), 'w') as file: - - for image_name in os.listdir(b_gt_image_dir): - if not image_name.endswith('.png'): - continue - - binary_gt_image_path = ops.join(b_gt_image_dir, image_name) - instance_gt_image_path = ops.join(i_gt_image_dir, image_name) - image_path = ops.join(image_dir, image_name) - - assert ops.exists(image_path), '{:s} not exist'.format(image_path) - assert ops.exists(instance_gt_image_path), '{:s} not exist'.format( - instance_gt_image_path) - - b_gt_image = cv2.imread(binary_gt_image_path, cv2.IMREAD_COLOR) - i_gt_image = cv2.imread(instance_gt_image_path, cv2.IMREAD_COLOR) - image = cv2.imread(image_path, cv2.IMREAD_COLOR) - - if b_gt_image is None or image is None or i_gt_image is None: - print('image: {:s} corrupt'.format(image_name)) - continue - else: - info = '{:s} {:s} {:s}'.format(image_path, binary_gt_image_path, - instance_gt_image_path) - file.write(info + '\n') - label_list.append(info) - if phase == 'train' and split: - np.random.RandomState(0).shuffle(label_list) - val_list_len = len(label_list) // 10 - val_label_list = label_list[:val_list_len] - train_label_list = label_list[val_list_len:] - with open('{:s}/{}ing/train_part.txt'.format(src_dir, phase, phase), - 'w') as file: - for info in train_label_list: - file.write(info + '\n') - with open('{:s}/{}ing/val_part.txt'.format(src_dir, phase, phase), - 'w') as file: - for info in val_label_list: - file.write(info + '\n') - return - - -def process_tusimple_dataset(src_dir): - - traing_folder_path = ops.join(src_dir, 'training') - testing_folder_path = ops.join(src_dir, 'testing') - - os.makedirs(traing_folder_path, exist_ok=True) - os.makedirs(testing_folder_path, exist_ok=True) - - for json_label_path in glob.glob('{:s}/label*.json'.format(src_dir)): - json_label_name = ops.split(json_label_path)[1] - - shutil.copyfile(json_label_path, - ops.join(traing_folder_path, json_label_name)) - - for json_label_path in glob.glob('{:s}/test_label.json'.format(src_dir)): - json_label_name = ops.split(json_label_path)[1] - - shutil.copyfile(json_label_path, - ops.join(testing_folder_path, json_label_name)) - - train_gt_image_dir = ops.join('training', 'gt_image') - train_gt_binary_dir = ops.join('training', 'gt_binary_image') - train_gt_instance_dir = ops.join('training', 'gt_instance_image') - - test_gt_image_dir = ops.join('testing', 'gt_image') - test_gt_binary_dir = ops.join('testing', 'gt_binary_image') - test_gt_instance_dir = ops.join('testing', 'gt_instance_image') - - os.makedirs(os.path.join(src_dir, train_gt_image_dir), exist_ok=True) - os.makedirs(os.path.join(src_dir, train_gt_binary_dir), exist_ok=True) - os.makedirs(os.path.join(src_dir, train_gt_instance_dir), exist_ok=True) - - os.makedirs(os.path.join(src_dir, test_gt_image_dir), exist_ok=True) - os.makedirs(os.path.join(src_dir, test_gt_binary_dir), exist_ok=True) - os.makedirs(os.path.join(src_dir, test_gt_instance_dir), exist_ok=True) - - for json_label_path in glob.glob('{:s}/*.json'.format(traing_folder_path)): - process_json_file(json_label_path, src_dir, train_gt_image_dir, - train_gt_binary_dir, train_gt_instance_dir) - - gen_sample(src_dir, train_gt_binary_dir, train_gt_instance_dir, - train_gt_image_dir, 'train', True) - - -if __name__ == '__main__': - args = init_args() - - process_tusimple_dataset(args.src_dir) diff --git a/legacy/contrib/LaneNet/utils/lanenet_postprocess.py b/legacy/contrib/LaneNet/utils/lanenet_postprocess.py deleted file mode 100644 index 172fc1c3f0..0000000000 --- a/legacy/contrib/LaneNet/utils/lanenet_postprocess.py +++ /dev/null @@ -1,410 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# this code heavily base on https://github.com/MaybeShewill-CV/lanenet-lane-detection/blob/master/lanenet_model/lanenet_postprocess.py -""" -LaneNet model post process -""" -import os.path as ops -import math - -import cv2 -import time -import numpy as np -from sklearn.cluster import DBSCAN -from sklearn.preprocessing import StandardScaler - - -def _morphological_process(image, kernel_size=5): - """ - morphological process to fill the hole in the binary segmentation result - :param image: - :param kernel_size: - :return: - """ - if len(image.shape) == 3: - raise ValueError( - 'Binary segmentation result image should be a single channel image') - - if image.dtype is not np.uint8: - image = np.array(image, np.uint8) - - kernel = cv2.getStructuringElement( - shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size)) - - # close operation fille hole - closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1) - - return closing - - -def _connect_components_analysis(image): - """ - connect components analysis to remove the small components - :param image: - :return: - """ - if len(image.shape) == 3: - gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - else: - gray_image = image - - return cv2.connectedComponentsWithStats( - gray_image, connectivity=8, ltype=cv2.CV_32S) - - -class _LaneFeat(object): - """ - - """ - - def __init__(self, feat, coord, class_id=-1): - """ - lane feat object - :param feat: lane embeddng feats [feature_1, feature_2, ...] - :param coord: lane coordinates [x, y] - :param class_id: lane class id - """ - self._feat = feat - self._coord = coord - self._class_id = class_id - - @property - def feat(self): - return self._feat - - @feat.setter - def feat(self, value): - if not isinstance(value, np.ndarray): - value = np.array(value, dtype=np.float64) - - if value.dtype != np.float32: - value = np.array(value, dtype=np.float64) - - self._feat = value - - @property - def coord(self): - return self._coord - - @coord.setter - def coord(self, value): - if not isinstance(value, np.ndarray): - value = np.array(value) - - if value.dtype != np.int32: - value = np.array(value, dtype=np.int32) - - self._coord = value - - @property - def class_id(self): - return self._class_id - - @class_id.setter - def class_id(self, value): - if not isinstance(value, np.int64): - raise ValueError('Class id must be integer') - - self._class_id = value - - -class _LaneNetCluster(object): - """ - Instance segmentation result cluster - """ - - def __init__(self): - """ - - """ - self._color_map = [ - np.array([255, 0, 0]), - np.array([0, 255, 0]), - np.array([0, 0, 255]), - np.array([125, 125, 0]), - np.array([0, 125, 125]), - np.array([125, 0, 125]), - np.array([50, 100, 50]), - np.array([100, 50, 100]) - ] - - @staticmethod - def _embedding_feats_dbscan_cluster(embedding_image_feats): - """ - dbscan cluster - """ - db = DBSCAN(eps=0.4, min_samples=500) - - try: - features = StandardScaler().fit_transform(embedding_image_feats) - db.fit(features) - except Exception as err: - print(err) - ret = { - 'origin_features': None, - 'cluster_nums': 0, - 'db_labels': None, - 'unique_labels': None, - 'cluster_center': None - } - return ret - db_labels = db.labels_ - unique_labels = np.unique(db_labels) - num_clusters = len(unique_labels) - cluster_centers = db.components_ - - ret = { - 'origin_features': features, - 'cluster_nums': num_clusters, - 'db_labels': db_labels, - 'unique_labels': unique_labels, - 'cluster_center': cluster_centers - } - - return ret - - @staticmethod - def _get_lane_embedding_feats(binary_seg_ret, instance_seg_ret): - """ - get lane embedding features according the binary seg result - """ - - idx = np.where(binary_seg_ret == 255) - lane_embedding_feats = instance_seg_ret[idx] - - lane_coordinate = np.vstack((idx[1], idx[0])).transpose() - - assert lane_embedding_feats.shape[0] == lane_coordinate.shape[0] - - ret = { - 'lane_embedding_feats': lane_embedding_feats, - 'lane_coordinates': lane_coordinate - } - - return ret - - def apply_lane_feats_cluster(self, binary_seg_result, instance_seg_result): - """ - - :param binary_seg_result: - :param instance_seg_result: - :return: - """ - # get embedding feats and coords - get_lane_embedding_feats_result = self._get_lane_embedding_feats( - binary_seg_ret=binary_seg_result, - instance_seg_ret=instance_seg_result) - - # dbscan cluster - dbscan_cluster_result = self._embedding_feats_dbscan_cluster( - embedding_image_feats=get_lane_embedding_feats_result[ - 'lane_embedding_feats']) - - mask = np.zeros( - shape=[binary_seg_result.shape[0], binary_seg_result.shape[1], 3], - dtype=np.uint8) - db_labels = dbscan_cluster_result['db_labels'] - unique_labels = dbscan_cluster_result['unique_labels'] - coord = get_lane_embedding_feats_result['lane_coordinates'] - - if db_labels is None: - return None, None - - lane_coords = [] - - for index, label in enumerate(unique_labels.tolist()): - if label == -1: - continue - idx = np.where(db_labels == label) - pix_coord_idx = tuple((coord[idx][:, 1], coord[idx][:, 0])) - mask[pix_coord_idx] = self._color_map[index] - lane_coords.append(coord[idx]) - - return mask, lane_coords - - -class LaneNetPostProcessor(object): - """ - lanenet post process for lane generation - """ - - def __init__(self, ipm_remap_file_path='./utils/tusimple_ipm_remap.yml'): - """ - convert front car view to bird view - """ - assert ops.exists(ipm_remap_file_path), '{:s} not exist'.format( - ipm_remap_file_path) - - self._cluster = _LaneNetCluster() - self._ipm_remap_file_path = ipm_remap_file_path - - remap_file_load_ret = self._load_remap_matrix() - self._remap_to_ipm_x = remap_file_load_ret['remap_to_ipm_x'] - self._remap_to_ipm_y = remap_file_load_ret['remap_to_ipm_y'] - - self._color_map = [ - np.array([255, 0, 0]), - np.array([0, 255, 0]), - np.array([0, 0, 255]), - np.array([125, 125, 0]), - np.array([0, 125, 125]), - np.array([125, 0, 125]), - np.array([50, 100, 50]), - np.array([100, 50, 100]) - ] - - def _load_remap_matrix(self): - fs = cv2.FileStorage(self._ipm_remap_file_path, cv2.FILE_STORAGE_READ) - - remap_to_ipm_x = fs.getNode('remap_ipm_x').mat() - remap_to_ipm_y = fs.getNode('remap_ipm_y').mat() - - ret = { - 'remap_to_ipm_x': remap_to_ipm_x, - 'remap_to_ipm_y': remap_to_ipm_y, - } - - fs.release() - - return ret - - def postprocess(self, - binary_seg_result, - instance_seg_result=None, - min_area_threshold=100, - source_image=None, - data_source='tusimple'): - - # convert binary_seg_result - binary_seg_result = np.array(binary_seg_result * 255, dtype=np.uint8) - # apply image morphology operation to fill in the hold and reduce the small area - morphological_ret = _morphological_process( - binary_seg_result, kernel_size=5) - connect_components_analysis_ret = _connect_components_analysis( - image=morphological_ret) - - labels = connect_components_analysis_ret[1] - stats = connect_components_analysis_ret[2] - for index, stat in enumerate(stats): - if stat[4] <= min_area_threshold: - idx = np.where(labels == index) - morphological_ret[idx] = 0 - - # apply embedding features cluster - mask_image, lane_coords = self._cluster.apply_lane_feats_cluster( - binary_seg_result=morphological_ret, - instance_seg_result=instance_seg_result) - - if mask_image is None: - return { - 'mask_image': None, - 'fit_params': None, - 'source_image': None, - } - - # lane line fit - fit_params = [] - src_lane_pts = [] - for lane_index, coords in enumerate(lane_coords): - if data_source == 'tusimple': - tmp_mask = np.zeros(shape=(720, 1280), dtype=np.uint8) - tmp_mask[tuple((np.int_(coords[:, 1] * 720 / 256), - np.int_(coords[:, 0] * 1280 / 512)))] = 255 - else: - raise ValueError('Wrong data source now only support tusimple') - tmp_ipm_mask = cv2.remap( - tmp_mask, - self._remap_to_ipm_x, - self._remap_to_ipm_y, - interpolation=cv2.INTER_NEAREST) - nonzero_y = np.array(tmp_ipm_mask.nonzero()[0]) - nonzero_x = np.array(tmp_ipm_mask.nonzero()[1]) - - fit_param = np.polyfit(nonzero_y, nonzero_x, 2) - fit_params.append(fit_param) - - [ipm_image_height, ipm_image_width] = tmp_ipm_mask.shape - plot_y = np.linspace(10, ipm_image_height, ipm_image_height - 10) - fit_x = fit_param[0] * plot_y**2 + fit_param[ - 1] * plot_y + fit_param[2] - - lane_pts = [] - for index in range(0, plot_y.shape[0], 5): - src_x = self._remap_to_ipm_x[ - int(plot_y[index]), - int(np.clip(fit_x[index], 0, ipm_image_width - 1))] - if src_x <= 0: - continue - src_y = self._remap_to_ipm_y[ - int(plot_y[index]), - int(np.clip(fit_x[index], 0, ipm_image_width - 1))] - src_y = src_y if src_y > 0 else 0 - - lane_pts.append([src_x, src_y]) - - src_lane_pts.append(lane_pts) - - # tusimple test data sample point along y axis every 10 pixels - source_image_width = source_image.shape[1] - for index, single_lane_pts in enumerate(src_lane_pts): - single_lane_pt_x = np.array(single_lane_pts, dtype=np.float32)[:, 0] - single_lane_pt_y = np.array(single_lane_pts, dtype=np.float32)[:, 1] - if data_source == 'tusimple': - start_plot_y = 240 - end_plot_y = 720 - else: - raise ValueError('Wrong data source now only support tusimple') - step = int(math.floor((end_plot_y - start_plot_y) / 10)) - for plot_y in np.linspace(start_plot_y, end_plot_y, step): - diff = single_lane_pt_y - plot_y - fake_diff_bigger_than_zero = diff.copy() - fake_diff_smaller_than_zero = diff.copy() - fake_diff_bigger_than_zero[np.where(diff <= 0)] = float('inf') - fake_diff_smaller_than_zero[np.where(diff > 0)] = float('-inf') - idx_low = np.argmax(fake_diff_smaller_than_zero) - idx_high = np.argmin(fake_diff_bigger_than_zero) - - previous_src_pt_x = single_lane_pt_x[idx_low] - previous_src_pt_y = single_lane_pt_y[idx_low] - last_src_pt_x = single_lane_pt_x[idx_high] - last_src_pt_y = single_lane_pt_y[idx_high] - - if previous_src_pt_y < start_plot_y or last_src_pt_y < start_plot_y or \ - fake_diff_smaller_than_zero[idx_low] == float('-inf') or \ - fake_diff_bigger_than_zero[idx_high] == float('inf'): - continue - - interpolation_src_pt_x = (abs(previous_src_pt_y - plot_y) * previous_src_pt_x + - abs(last_src_pt_y - plot_y) * last_src_pt_x) / \ - (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y)) - interpolation_src_pt_y = (abs(previous_src_pt_y - plot_y) * previous_src_pt_y + - abs(last_src_pt_y - plot_y) * last_src_pt_y) / \ - (abs(previous_src_pt_y - plot_y) + abs(last_src_pt_y - plot_y)) - - if interpolation_src_pt_x > source_image_width or interpolation_src_pt_x < 10: - continue - - lane_color = self._color_map[index].tolist() - cv2.circle( - source_image, - (int(interpolation_src_pt_x), int(interpolation_src_pt_y)), - 5, lane_color, -1) - ret = { - 'mask_image': mask_image, - 'fit_params': fit_params, - 'source_image': source_image, - } - return ret diff --git a/legacy/contrib/LaneNet/utils/load_model_utils.py b/legacy/contrib/LaneNet/utils/load_model_utils.py deleted file mode 100644 index 012cc680e9..0000000000 --- a/legacy/contrib/LaneNet/utils/load_model_utils.py +++ /dev/null @@ -1,126 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp - -import six -import numpy as np - - -def parse_param_file(param_file, return_shape=True): - from paddle.fluid.proto.framework_pb2 import VarType - f = open(param_file, 'rb') - version = np.fromstring(f.read(4), dtype='int32') - lod_level = np.fromstring(f.read(8), dtype='int64') - for i in range(int(lod_level)): - _size = np.fromstring(f.read(8), dtype='int64') - _ = f.read(_size) - version = np.fromstring(f.read(4), dtype='int32') - tensor_desc = VarType.TensorDesc() - tensor_desc_size = np.fromstring(f.read(4), dtype='int32') - tensor_desc.ParseFromString(f.read(int(tensor_desc_size))) - tensor_shape = tuple(tensor_desc.dims) - if return_shape: - f.close() - return tuple(tensor_desc.dims) - if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) - data_size = 4 - for i in range(len(tensor_shape)): - data_size *= tensor_shape[i] - weight = np.fromstring(f.read(data_size), dtype='float32') - f.close() - return np.reshape(weight, tensor_shape) - - -def load_pdparams(exe, main_prog, model_dir): - import paddle.fluid as fluid - from paddle.fluid.proto.framework_pb2 import VarType - from paddle.fluid.framework import Program - - vars_to_load = list() - vars_not_load = list() - import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: - params_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - unused_vars = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if var.name not in params_dict: - print("{} is not in saved model".format(var.name)) - vars_not_load.append(var.name) - continue - if var.shape != params_dict[var.name].shape: - unused_vars.append(var.name) - vars_not_load.append(var.name) - print( - "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})" - .format(var.name, params_dict[var.name].shape, var.shape)) - continue - vars_to_load.append(var) - for var_name in unused_vars: - del params_dict[var_name] - fluid.io.set_program_state(main_prog, params_dict) - - if len(vars_to_load) == 0: - print( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - print("There are {}/{} varaibles in {} are loaded.".format( - len(vars_to_load), - len(vars_to_load) + len(vars_not_load), model_dir)) - - -def load_pretrained_weights(exe, main_prog, weights_dir): - if not osp.exists(weights_dir): - raise Exception("Path {} not exists.".format(weights_dir)) - if osp.exists(osp.join(weights_dir, "model.pdparams")): - return load_pdparams(exe, main_prog, weights_dir) - import paddle.fluid as fluid - vars_to_load = list() - vars_not_load = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if not osp.exists(osp.join(weights_dir, var.name)): - print("[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) - vars_not_load.append(var) - continue - pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) - actual_shape = tuple(var.shape) - if pretrained_shape != actual_shape: - print( - "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})" - .format(weights_dir, var.name, pretrained_shape, actual_shape)) - vars_not_load.append(var) - continue - vars_to_load.append(var) - params_dict = fluid.io.load_program_state( - weights_dir, var_list=vars_to_load) - fluid.io.set_program_state(main_prog, params_dict) - if len(vars_to_load) == 0: - print( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - print("There are {}/{} varaibles in {} are loaded.".format( - len(vars_to_load), - len(vars_to_load) + len(vars_not_load), weights_dir)) diff --git a/legacy/contrib/LaneNet/vis.py b/legacy/contrib/LaneNet/vis.py deleted file mode 100644 index a037e8451a..0000000000 --- a/legacy/contrib/LaneNet/vis.py +++ /dev/null @@ -1,215 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys - -cur_path = os.path.abspath(os.path.dirname(__file__)) -root_path = os.path.split(os.path.split(cur_path)[0])[0] -SEG_PATH = os.path.join(cur_path, "../../../") -sys.path.append(SEG_PATH) -sys.path.append(root_path) - -import matplotlib -matplotlib.use('Agg') -import time -import argparse -import pprint -import cv2 -import numpy as np -import paddle.fluid as fluid - -from utils.config import cfg -from reader import LaneNetDataset -from models.model_builder import build_model -from models.model_builder import ModelPhase -from utils import lanenet_postprocess -import matplotlib.pyplot as plt - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddeSeg visualization tools') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', dest='use_gpu', help='Use gpu or cpu', action='store_true') - parser.add_argument( - '--vis_dir', - dest='vis_dir', - help='visual save dir', - type=str, - default='visual') - parser.add_argument( - '--also_save_raw_results', - dest='also_save_raw_results', - help='whether to save raw result', - action='store_true') - parser.add_argument( - '--local_test', - dest='local_test', - help='if in local test mode, only visualize 5 images for testing', - action='store_true') - parser.add_argument( - 'opts', - help='See config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def makedirs(directory): - if not os.path.exists(directory): - os.makedirs(directory) - - -def to_png_fn(fn, name=""): - """ - Append png as filename postfix - """ - directory, filename = os.path.split(fn) - basename, ext = os.path.splitext(filename) - - return basename + name + ".png" - - -def minmax_scale(input_arr): - min_val = np.min(input_arr) - max_val = np.max(input_arr) - - output_arr = (input_arr - min_val) * 255.0 / (max_val - min_val) - - return output_arr - - -def visualize(cfg, - vis_file_list=None, - use_gpu=False, - vis_dir="visual", - also_save_raw_results=False, - ckpt_dir=None, - log_writer=None, - local_test=False, - **kwargs): - if vis_file_list is None: - vis_file_list = cfg.DATASET.TEST_FILE_LIST - - dataset = LaneNetDataset( - file_list=vis_file_list, - mode=ModelPhase.VISUAL, - shuffle=True, - data_dir=cfg.DATASET.DATA_DIR) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) - # Clone forward graph - test_prog = test_prog.clone(for_test=True) - - # Get device environment - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - if ckpt_dir is not None: - print('load test model:', ckpt_dir) - try: - fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) - except: - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - save_dir = os.path.join(vis_dir, 'visual_results') - makedirs(save_dir) - if also_save_raw_results: - raw_save_dir = os.path.join(vis_dir, 'raw_results') - makedirs(raw_save_dir) - - fetch_list = [pred.name, logit.name] - test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) - - postprocessor = lanenet_postprocess.LaneNetPostProcessor() - for imgs, grts, grts_instance, img_names, valid_shapes, org_imgs in test_reader: - segLogits, emLogits = exe.run( - program=test_prog, - feed={'image': imgs}, - fetch_list=fetch_list, - return_numpy=True) - num_imgs = segLogits.shape[0] - - for i in range(num_imgs): - gt_image = org_imgs[i] - binary_seg_image, instance_seg_image = segLogits[i].squeeze( - -1), emLogits[i].transpose((1, 2, 0)) - - postprocess_result = postprocessor.postprocess( - binary_seg_result=binary_seg_image, - instance_seg_result=instance_seg_image, - source_image=gt_image) - pred_binary_fn = os.path.join( - save_dir, to_png_fn(img_names[i], name='_pred_binary')) - pred_lane_fn = os.path.join( - save_dir, to_png_fn(img_names[i], name='_pred_lane')) - pred_instance_fn = os.path.join( - save_dir, to_png_fn(img_names[i], name='_pred_instance')) - dirname = os.path.dirname(pred_binary_fn) - - makedirs(dirname) - mask_image = postprocess_result['mask_image'] - for i in range(4): - instance_seg_image[:, :, i] = minmax_scale( - instance_seg_image[:, :, i]) - embedding_image = np.array(instance_seg_image).astype(np.uint8) - - plt.figure('mask_image') - plt.imshow(mask_image[:, :, (2, 1, 0)]) - plt.figure('src_image') - plt.imshow(gt_image[:, :, (2, 1, 0)]) - plt.figure('instance_image') - plt.imshow(embedding_image[:, :, (2, 1, 0)]) - plt.figure('binary_image') - plt.imshow(binary_seg_image * 255, cmap='gray') - plt.show() - - cv2.imwrite(pred_binary_fn, - np.array(binary_seg_image * 255).astype(np.uint8)) - cv2.imwrite(pred_lane_fn, postprocess_result['source_image']) - cv2.imwrite(pred_instance_fn, mask_image) - print(pred_lane_fn, 'saved!') - - -if __name__ == '__main__': - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - visualize(cfg, **args.__dict__) diff --git a/legacy/contrib/MechanicalIndustryMeter/download_mini_mechanical_industry_meter.py b/legacy/contrib/MechanicalIndustryMeter/download_mini_mechanical_industry_meter.py deleted file mode 100644 index fb98fdf9c4..0000000000 --- a/legacy/contrib/MechanicalIndustryMeter/download_mini_mechanical_industry_meter.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_deepglobe_road_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/mini_mechanical_industry_meter_data.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_deepglobe_road_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/contrib/MechanicalIndustryMeter/download_unet_mechanical_industry_meter.py b/legacy/contrib/MechanicalIndustryMeter/download_unet_mechanical_industry_meter.py deleted file mode 100644 index 32b2a8aea3..0000000000 --- a/legacy/contrib/MechanicalIndustryMeter/download_unet_mechanical_industry_meter.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - -if __name__ == "__main__": - download_file_and_uncompress( - url= - 'https://paddleseg.bj.bcebos.com/models/unet_mechanical_industry_meter.tar', - savepath=LOCAL_PATH, - extrapath=LOCAL_PATH) - - print("Pretrained Model download success!") diff --git a/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.JPG b/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.JPG deleted file mode 100644 index 5d9869d29a..0000000000 Binary files a/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.JPG and /dev/null differ diff --git a/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.png b/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.png deleted file mode 100644 index 440eb76d07..0000000000 Binary files a/legacy/contrib/MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.png and /dev/null differ diff --git a/legacy/contrib/MechanicalIndustryMeter/unet_mechanical_meter.yaml b/legacy/contrib/MechanicalIndustryMeter/unet_mechanical_meter.yaml deleted file mode 100644 index a23dd84610..0000000000 --- a/legacy/contrib/MechanicalIndustryMeter/unet_mechanical_meter.yaml +++ /dev/null @@ -1,52 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1537) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True - RICH_CROP: - ENABLE: False - -BATCH_SIZE: 2 -MEAN: [0.5, 0.5, 0.5] -STD: [0.5, 0.5, 0.5] - -DATALOADER: - BUF_SIZE: 256 - NUM_WORKERS: 4 -DATASET: - DATA_DIR: "./contrib/MechanicalIndustryMeter/mini_mechanical_industry_meter_data/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 5 - TEST_FILE_LIST: "./contrib/MechanicalIndustryMeter/mini_mechanical_industry_meter_data/val_mini.txt" - TEST_TOTAL_IMAGES: 8 - TRAIN_FILE_LIST: "./contrib/MechanicalIndustryMeter/mini_mechanical_industry_meter_data/train_mini.txt" - TRAIN_TOTAL_IMAGES: 64 - VAL_FILE_LIST: "./contrib/MechanicalIndustryMeter/mini_mechanical_industry_meter_data/val_mini.txt" - VAL_TOTAL_IMAGES: 8 - SEPARATOR: "|" - IGNORE_INDEX: 255 - -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - MODEL_NAME: "unet" - DEFAULT_NORM_TYPE: "bn" -TEST: - TEST_MODEL: "./saved_model/unet_mechanical_meter/final/" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/unet_mechanical_meter/" - PRETRAINED_MODEL_DIR: "./pretrained_model/unet_bn_coco/" - SNAPSHOT_EPOCH: 10 -SOLVER: - NUM_EPOCHS: 100 - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "sgd" diff --git a/legacy/contrib/NeurIPS_SN7/Dockerfile b/legacy/contrib/NeurIPS_SN7/Dockerfile deleted file mode 100644 index 7fd8852886..0000000000 --- a/legacy/contrib/NeurIPS_SN7/Dockerfile +++ /dev/null @@ -1,104 +0,0 @@ -# Adapted from: https://raw.githubusercontent.com/CosmiQ/solaris/master/docker/gpu/Dockerfile -# docker build -t sn7_baseline_image /path_to_docker/ -# NV_GPU=0 nvidia-docker run -it -v /local_data:/local_data -v --rm -ti --ipc=host --name sn7_baseline_gpu0 sn7_baseline_image - -FROM nvidia/cuda:10.2-devel-ubuntu16.04 -LABEL maintainer="avanetten " - -ENV CUDNN_VERSION 7.6.0.64 -LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" -ARG solaris_branch='master' - -# prep apt-get and cudnn -RUN apt-get update && apt-get install -y --no-install-recommends \ - apt-utils \ - libcudnn7=$CUDNN_VERSION-1+cuda10.0 \ - libcudnn7-dev=$CUDNN_VERSION-1+cuda10.0 && \ - apt-mark hold libcudnn7 && \ - rm -rf /var/lib/apt/lists/* - -# install requirements -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - bc \ - bzip2 \ - ca-certificates \ - curl \ - git \ - libgdal-dev \ - libssl-dev \ - libffi-dev \ - libncurses-dev \ - libgl1 \ - jq \ - nfs-common \ - parallel \ - python-dev \ - python-pip \ - python-wheel \ - python-setuptools \ - unzip \ - vim \ - wget \ - build-essential \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -SHELL ["/bin/bash", "-c"] -ENV PATH /opt/conda/bin:$PATH - -# install anaconda -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O ~/miniconda.sh && \ - /bin/bash ~/miniconda.sh -b -p /opt/conda && \ - rm ~/miniconda.sh && \ - /opt/conda/bin/conda clean -tipsy && \ - ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ - echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc - -# prepend pytorch and conda-forge before default channel -RUN conda update conda && \ - conda config --prepend channels conda-forge && \ - conda config --prepend channels pytorch - -# get dev version of solaris and create conda environment based on its env file -WORKDIR /tmp/ -RUN git clone https://github.com/cosmiq/solaris.git && \ - cd solaris && \ - git checkout ${solaris_branch} && \ - conda env create -f environment-gpu.yml -ENV PATH /opt/conda/envs/solaris/bin:$PATH - -RUN cd solaris && pip install . - -# install various conda dependencies into the space_base environment -RUN conda install -n solaris \ - jupyter \ - jupyterlab \ - ipykernel - -# add a jupyter kernel for the conda environment in case it's wanted -RUN source activate solaris && python -m ipykernel.kernelspec \ - --name solaris --display-name solaris - -# ensure solaris is activated -# RUN conda activate solaris - -# Need imagecodecs for Planet files -RUN pip install imagecodecs - -# Paddle -RUN python3 -m pip install paddlepaddle-gpu==1.8.2.post107 -i https://mirror.baidu.com/pypi/simple -RUN pip install scipy==1.3.2 - -WORKDIR /work -COPY . /work/ -RUN chmod 777 train.sh -RUN chmod 777 test.sh -RUN chmod 777 download.sh -RUN chmod 777 pretrained_model/download_model.py - -# open ports for jupyterlab and tensorboard -EXPOSE 8888 6006 - -RUN [ "/bin/bash" ] diff --git a/legacy/contrib/NeurIPS_SN7/README.md b/legacy/contrib/NeurIPS_SN7/README.md deleted file mode 100644 index 9a1cb8ab29..0000000000 --- a/legacy/contrib/NeurIPS_SN7/README.md +++ /dev/null @@ -1,39 +0,0 @@ -## The Champion Model of Multi-Temporal Urban Development Challenge at NeurIPS2020 - -### 1. Approach - -For the detailed approaches, please refer to the [document](./approach.pdf). - -### 2. Dataset - -Download data from one of following links: - -option 1: The official dataset link: https://spacenet.ai/sn7-challenge/ - -option 2: The BaiduYun [link](https://pan.baidu.com/s/1WM0IHup5Uau7FZGQf7rzdA), the access code: 17th - -### 3. Deployment Guide - -- Build docker image: -``` -docker build -t . -``` -- Run docker: -``` -docker run -v :/data:ro -v :/wdata -it -``` -Please see https://github.com/topcoderinc/marathon-docker-template/tree/master/data-plus-code-style - -### 4. Final Verification - -- Train: -``` -./train.sh /data/train -``` -- Test: -``` -./test.sh /data/test/ solution.csv -``` - -### 5. Team Members -- Xiang Long, Honghui Zheng, Yan Peng diff --git a/legacy/contrib/NeurIPS_SN7/approach.pdf b/legacy/contrib/NeurIPS_SN7/approach.pdf deleted file mode 100644 index 5460dfece0..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/approach.pdf and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/data_lib.py b/legacy/contrib/NeurIPS_SN7/data_lib.py deleted file mode 100644 index d4b1399d0f..0000000000 --- a/legacy/contrib/NeurIPS_SN7/data_lib.py +++ /dev/null @@ -1,329 +0,0 @@ -import os -import sys -import multiprocessing -import warnings - -warnings.filterwarnings('ignore') - -import pandas as pd -import skimage -import gdal -import numpy as np -import cv2 -from PIL import Image - -Image.MAX_IMAGE_PIXELS = None - -import solaris as sol -from solaris.raster.image import create_multiband_geotiff -from solaris.utils.core import _check_gdf_load - -module_path = os.path.abspath(os.path.join('./src/')) -if module_path not in sys.path: - sys.path.append(module_path) -from solaris.preproc.image import LoadImage, SaveImage, Resize -from sn7_baseline_prep_funcs import map_wrapper, make_geojsons_and_masks - -# ###### common configs for divide images ###### - -# pre resize -pre_height = None # 3072 -pre_width = None # 3072 -# final output size -target_height = 512 -target_width = 512 -# stride -height_stride = 512 -width_stride = 512 -# padding, always the same as ignore pixel -padding_pixel = 255 - -# ########################### - - -def get_color_map_list(num_classes): - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j, lab = 0, i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - return color_map - - -def compose_img(divide_img_ls, compose_img_dir, ext=".png"): - im_list = sorted(divide_img_ls) - - last_file = os.path.split(im_list[-1])[-1] - file_name = '_'.join(last_file.split('.')[0].split('_')[:-2]) - yy, xx = last_file.split('.')[0].split('_')[-2:] - rows = int(yy) // height_stride + 1 - cols = int(xx) // width_stride + 1 - - image = Image.new('P', - (cols * target_width, rows * target_height)) # 创建一个新图 - for y in range(rows): - for x in range(cols): - patch = Image.open(im_list[cols * y + x]) - image.paste(patch, (x * target_width, y * target_height)) - - color_map = get_color_map_list(256) - image.putpalette(color_map) - image.save(os.path.join(compose_img_dir, file_name + ext)) - - -def compose_arr(divide_img_ls, compose_img_dir, ext=".npy"): - """ - Core function of putting results into one. - """ - im_list = sorted(divide_img_ls) - - last_file = os.path.split(im_list[-1])[-1] - file_name = '_'.join(last_file.split('.')[0].split('_')[:-2]) - yy, xx = last_file.split('.')[0].split('_')[-2:] - rows = int(yy) // height_stride + 1 - cols = int(xx) // width_stride + 1 - - image = np.zeros( - (cols * target_width, rows * target_height), dtype=np.float32) * 255 - for y in range(rows): - for x in range(cols): - patch = np.load(im_list[cols * y + x]) - image[y * target_height:(y + 1) * target_height, x * - target_width:(x + 1) * target_width] = patch - - np.save(os.path.join(compose_img_dir, file_name + ext), image) - - -def divide_img(img_file, save_dir='divide_imgs', inter_type=cv2.INTER_LINEAR): - """ - Core function of dividing images. - """ - _, filename = os.path.split(img_file) - basename, ext = os.path.splitext(filename) - - img = np.array(Image.open(img_file)) - if pre_height is not None and pre_width is not None: - if 1023 in img.shape: - offset_h = 1 if img.shape[0] == 1023 else 0 - offset_w = 1 if img.shape[1] == 1023 else 0 - img = cv2.copyMakeBorder( - img, 0, offset_h, 0, offset_w, cv2.BORDER_CONSTANT, value=255) - img = cv2.resize(img, (pre_height, pre_width), interpolation=inter_type) - - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - src_im_height = img.shape[0] - src_im_width = img.shape[1] - - x1, y1, idx = 0, 0, 0 - while y1 < src_im_height: - y2 = y1 + target_height - while x1 < src_im_width: - x2 = x1 + target_width - img_crop = img[y1:y2, x1:x2] - if y2 > src_im_height or x2 > src_im_width: - pad_bottom = y2 - src_im_height if y2 > src_im_height else 0 - pad_right = x2 - src_im_width if x2 > src_im_width else 0 - img_crop = cv2.copyMakeBorder( - img_crop, - 0, - pad_bottom, - 0, - pad_right, - cv2.BORDER_CONSTANT, - value=padding_pixel) - save_file = os.path.join(save_dir, - basename + "_%05d_%05d" % (y1, x1) + ext) - Image.fromarray(img_crop).save(save_file) - x1 += width_stride - idx += 1 - x1 = 0 - y1 += height_stride - - -def divide(root): - """ - Considering the training speed, we divide the image into small images. - """ - locas = [os.path.join(root, x) for x in os.listdir(root)] - for loca in locas: - if not os.path.isdir(os.path.join(root, loca)): - continue - print(loca) - img_path = os.path.join(loca, "images_masked_3x") - imgs = [os.path.join(img_path, x) for x in os.listdir(img_path)] - for img in imgs: - divide_img(img, os.path.join(loca, "images_masked_3x_divide")) - - grt_path = os.path.join(loca, "masks_3x") - if not os.path.exists(grt_path): - continue - grts = [os.path.join(grt_path, x) for x in os.listdir(grt_path)] - for grt in grts: - divide_img(grt, os.path.join(loca, "masks_3x_divide"), - cv2.INTER_NEAREST) - - -def compose(root): - """ - Because the images are cut into small parts, the output results are also small parts. - We need to put the output results into a large one. - """ - dst = root + "_compose" - if not os.path.exists(dst): - os.makedirs(dst) - dic = {} - img_files = [os.path.join(root, x) for x in os.listdir(root)] - for img_file in img_files: - key = '_'.join(img_file.split('/')[-1].split('_')[2:9]) - if key not in dic: - dic[key] = [img_file] - else: - dic[key].append(img_file) - - for k, v in dic.items(): - print(k) - compose_arr(v, dst) - - -def enlarge_3x(root): - """ - Enlarge the original images by 3 times. - """ - aois = [os.path.join(root, x) for x in os.listdir(root)] - for aoi in aois: - if not os.path.isdir(os.path.join(root, aoi)): - continue - print("enlarge 3x:", aoi) - images_masked = os.path.join(aoi, "images_masked") - img_files = [ - os.path.join(images_masked, x) for x in os.listdir(images_masked) - ] - images_masked_3x = os.path.join(aoi, "images_masked_3x") - if not os.path.exists(images_masked_3x): - os.makedirs(images_masked_3x) - for img_file in img_files: - lo = LoadImage(img_file) - img = lo.load() - _, height, width = img.data.shape - - re = Resize(height * 3, width * 3) - img = re.resize(img, height * 3, width * 3) - assert img.data.shape[1] == height * 3 - assert img.data.shape[2] == width * 3 - - sa = SaveImage( - img_file.replace("images_masked", "images_masked_3x")) - sa.transform(img) - - -def create_label(root, f3x=True): - """ - Create label according to given json file. - If f3x is True, it will create label that enlarged 3 times than original size. - """ - aois = os.listdir(root) - - n_threads = 10 - make_fbc = False - - input_args = [] - for i, aoi in enumerate(aois): - if not os.path.isdir(os.path.join(root, aoi)): - continue - print(i, "aoi:", aoi) - im_dir = os.path.join(root, aoi, - 'images_masked_3x/' if f3x else 'images_masked/') - json_dir = os.path.join(root, aoi, 'labels_match/') - out_dir_mask = os.path.join(root, aoi, 'masks_3x/' if f3x else 'masks/') - out_dir_mask_fbc = os.path.join( - root, aoi, 'masks_fbc_3x/' if f3x else 'masks_fbc/') - os.makedirs(out_dir_mask, exist_ok=True) - if make_fbc: - os.makedirs(out_dir_mask_fbc, exist_ok=True) - - json_files = sorted([ - f for f in os.listdir(os.path.join(json_dir)) - if f.endswith('Buildings.geojson') - and os.path.exists(os.path.join(json_dir, f)) - ]) - for j, f in enumerate(json_files): - # print(i, j, f) - name_root = f.split('.')[0] - json_path = os.path.join(json_dir, f) - image_path = os.path.join(im_dir, name_root + '.tif').replace( - 'labels', 'images').replace('_Buildings', '') - output_path_mask = os.path.join(out_dir_mask, name_root + '.tif') - if make_fbc: - output_path_mask_fbc = os.path.join(out_dir_mask_fbc, - name_root + '.tif') - else: - output_path_mask_fbc = None - - if not os.path.exists(output_path_mask): - input_args.append([ - make_geojsons_and_masks, name_root, image_path, json_path, - output_path_mask, output_path_mask_fbc - ]) - - print("len input_args", len(input_args)) - print("Execute...\n") - with multiprocessing.Pool(n_threads) as pool: - pool.map(map_wrapper, input_args) - - -def create_trainval_list(root): - """ - Create train list and validation list. - Aois in val_aois below are chosen to validation aois. - """ - val_aois = set([ - "L15-0387E-1276N_1549_3087_13", "L15-1276E-1107N_5105_3761_13", - "L15-1015E-1062N_4061_3941_13", "L15-1615E-1206N_6460_3366_13", - "L15-1438E-1134N_5753_3655_13", "L15-0632E-0892N_2528_4620_13", - "L15-0566E-1185N_2265_3451_13", "L15-1200E-0847N_4802_4803_13", - "L15-1848E-0793N_7394_5018_13", "L15-1690E-1211N_6763_3346_13" - ]) - fw1 = open("train_list.txt", 'w') - fw2 = open("val_list.txt", 'w') - for aoi in os.listdir(root): - if not os.path.isdir(os.path.join(root, aoi)): - continue - img_path = os.path.join(root, aoi, "images_masked_3x_divide") - grt_path = os.path.join(root, aoi, "masks_3x_divide") - for grt_file in os.listdir(grt_path): - img_file = grt_file.replace("_Buildings", '') - if os.path.isfile(os.path.join(img_path, img_file)): - if aoi in val_aois: - fw2.write( - os.path.join(aoi, "images_masked_3x_divide", img_file) + - ' ' + os.path.join(aoi, "masks_3x_divide", grt_file) + - '\n') - else: - fw1.write( - os.path.join(aoi, "images_masked_3x_divide", img_file) + - ' ' + os.path.join(aoi, "masks_3x_divide", grt_file) + - '\n') - fw1.close() - fw2.close() - - -def create_test_list(root): - """ - Create test list. - """ - fw = open("test_list.txt", 'w') - for aoi in os.listdir(root): - if not os.path.isdir(os.path.join(root, aoi)): - continue - img_path = os.path.join(root, aoi, "images_masked_3x_divide") - for img_file in os.listdir(img_path): - fw.write( - os.path.join(aoi, "images_masked_3x_divide", img_file) + - " dummy.tif\n") - fw.close() diff --git a/legacy/contrib/NeurIPS_SN7/download.sh b/legacy/contrib/NeurIPS_SN7/download.sh deleted file mode 100644 index 7d82dc5868..0000000000 --- a/legacy/contrib/NeurIPS_SN7/download.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -mkdir -p /wdata/saved_model/hrnet/ -wget https://www.dropbox.com/s/krtl5tmrkf4qv56/prefix.tar.gz?dl=1 -O prefix.tar.gz -tar -zxf prefix.tar.gz -cp -r prefix /wdata/saved_model/hrnet/best_model diff --git a/legacy/contrib/NeurIPS_SN7/dummy.tif b/legacy/contrib/NeurIPS_SN7/dummy.tif deleted file mode 100644 index 5582db1500..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/dummy.tif and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f1.png b/legacy/contrib/NeurIPS_SN7/figs/f1.png deleted file mode 100644 index 957bb41a72..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f1.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f2.png b/legacy/contrib/NeurIPS_SN7/figs/f2.png deleted file mode 100644 index 910516a29f..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f2.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f3.png b/legacy/contrib/NeurIPS_SN7/figs/f3.png deleted file mode 100644 index f10b83212d..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f3.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f4.png b/legacy/contrib/NeurIPS_SN7/figs/f4.png deleted file mode 100644 index eea32b4dce..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f4.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f5.png b/legacy/contrib/NeurIPS_SN7/figs/f5.png deleted file mode 100644 index 7c76ecbb68..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f5.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f6.png b/legacy/contrib/NeurIPS_SN7/figs/f6.png deleted file mode 100644 index 00083eeee7..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f6.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f7.png b/legacy/contrib/NeurIPS_SN7/figs/f7.png deleted file mode 100644 index 0a795633cc..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f7.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/figs/f8.png b/legacy/contrib/NeurIPS_SN7/figs/f8.png deleted file mode 100644 index 3307a4a6e1..0000000000 Binary files a/legacy/contrib/NeurIPS_SN7/figs/f8.png and /dev/null differ diff --git a/legacy/contrib/NeurIPS_SN7/hrnet_sn7.yaml b/legacy/contrib/NeurIPS_SN7/hrnet_sn7.yaml deleted file mode 100644 index f816796494..0000000000 --- a/legacy/contrib/NeurIPS_SN7/hrnet_sn7.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# 数据集配置 -DATASET: - DATA_DIR: "/data/SN7_buildings/train/" - NUM_CLASSES: 2 - TEST_FILE_LIST: "test_list.txt" - TRAIN_FILE_LIST: "train_list.txt" - VAL_FILE_LIST: "val_list.txt" -DATALOADER: - NUM_WORKERS: 16 -# 预训练模型配置 -MODEL: - MODEL_NAME: "hrnet" - DEFAULT_NORM_TYPE: "bn" - HRNET: - STAGE2: - NUM_CHANNELS: [48, 96] - STAGE3: - NUM_CHANNELS: [48, 96, 192] - STAGE4: - NUM_CHANNELS: [48, 96, 192, 384] - -# 其他配置 -TRAIN_CROP_SIZE: (512, 512) -EVAL_CROP_SIZE: (512, 512) -AUG: - AUG_METHOD: "rangescaling" - FIX_RESIZE_SIZE: (512, 512) - INF_RESIZE_VALUE: 512 - MAX_RESIZE_VALUE: 600 - MIN_RESIZE_VALUE: 400 - MIRROR: True -BATCH_SIZE: 16 -TRAIN: - PRETRAINED_MODEL_DIR: "./pretrained_model/hrnet_w48_bn_imagenet/" - MODEL_SAVE_DIR: "/wdata/saved_model/hrnet/" - SNAPSHOT_EPOCH: 1 -TEST: - TEST_MODEL: "/wdata/saved_model/hrnet/best_model/" - TEST_AUG: False - TEST_AUG_FLIP_OPS: ['v'] - TEST_AUG_ROTATE_OPS: [] -SOLVER: - NUM_EPOCHS: 70 - LR: 0.01 - LR_POLICY: "piecewise" - DECAY_EPOCH: [40, 60] - GAMMA: 0.1 - OPTIMIZER: "sgd" -VIS: - ADD_LABEL: False - RAW_PRED: True diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/LICENSE b/legacy/contrib/NeurIPS_SN7/pdseg/LICENSE deleted file mode 100644 index 753842b672..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/__init__.py deleted file mode 100644 index eb2fd412bc..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import models -import utils -from . import tools diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/check.py b/legacy/contrib/NeurIPS_SN7/pdseg/check.py deleted file mode 100644 index 629f7e45d8..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/check.py +++ /dev/null @@ -1,657 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import os -import sys -import pprint -import argparse -import cv2 -from tqdm import tqdm -import imghdr -import logging - -from utils.config import cfg -from reader import pil_imread - - -def init_global_variable(): - """ - 初始化全局变量 - """ - global png_format_right_num # 格式正确的标注图数量 - global png_format_wrong_num # 格式错误的标注图数量 - global total_grt_classes # 总的标注类别 - global total_num_of_each_class # 每个类别总的像素数 - global shape_unequal_image # 图片和标注shape不一致列表 - global png_format_wrong_image # 标注格式错误列表 - global max_width # 图片最长宽 - global max_height # 图片最长高 - global min_aspectratio # 图片最小宽高比 - global max_aspectratio # 图片最大宽高比 - global img_dim # 图片的通道数 - global list_wrong # 文件名格式错误列表 - global imread_failed # 图片读取失败列表, 二元列表 - global label_wrong # 标注图片出错列表 - global label_gray_wrong # 标注图非灰度图列表 - - png_format_right_num = 0 - png_format_wrong_num = 0 - total_grt_classes = [] - total_num_of_each_class = [] - shape_unequal_image = [] - png_format_wrong_image = [] - max_width = 0 - max_height = 0 - min_aspectratio = sys.float_info.max - max_aspectratio = 0 - img_dim = [] - list_wrong = [] - imread_failed = [] - label_wrong = [] - label_gray_wrong = [] - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddleSeg check') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - return parser.parse_args() - - -def error_print(str): - return "".join(["\nNOT PASS ", str]) - - -def correct_print(str): - return "".join(["\nPASS ", str]) - - -def cv2_imread(file_path, flag=cv2.IMREAD_COLOR): - """ - 解决 cv2.imread 在window平台打开中文路径的问题. - """ - return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag) - - -def get_image_max_height_width(img): - """获取图片最大宽和高""" - global max_width, max_height - img_shape = img.shape - height, width = img_shape[0], img_shape[1] - max_height = max(height, max_height) - max_width = max(width, max_width) - - -def get_image_min_max_aspectratio(img): - """计算图片最大宽高比""" - global min_aspectratio, max_aspectratio - img_shape = img.shape - height, width = img_shape[0], img_shape[1] - min_aspectratio = min(width / height, min_aspectratio) - max_aspectratio = max(width / height, max_aspectratio) - return min_aspectratio, max_aspectratio - - -def get_image_dim(img): - """获取图像的通道数""" - img_shape = img.shape - if img_shape[-1] not in img_dim: - img_dim.append(img_shape[-1]) - - -def is_label_gray(grt): - """判断标签是否为灰度图""" - grt_shape = grt.shape - if len(grt_shape) == 2: - return True - else: - return False - - -def image_label_shape_check(img, grt): - """ - 验证图像和标注的大小是否匹配 - """ - - flag = True - img_height = img.shape[0] - img_width = img.shape[1] - grt_height = grt.shape[0] - grt_width = grt.shape[1] - - if img_height != grt_height or img_width != grt_width: - flag = False - return flag - - -def ground_truth_check(grt, grt_path): - """ - 验证标注图像的格式 - 统计标注图类别和像素数 - params: - grt: 标注图 - grt_path: 标注图路径 - return: - png_format: 返回是否是png格式图片 - unique: 返回标注类别 - counts: 返回标注的像素数 - """ - if imghdr.what(grt_path) == "png": - png_format = True - else: - png_format = False - - unique, counts = np.unique(grt, return_counts=True) - - return png_format, unique, counts - - -def sum_gt_check(png_format, grt_classes, num_of_each_class): - """ - 统计所有标注图上的格式、类别和每个类别的像素数 - params: - png_format: 是否是png格式图片 - grt_classes: 标注类别 - num_of_each_class: 各个类别的像素数目 - """ - is_label_correct = True - global png_format_right_num, png_format_wrong_num, total_grt_classes, total_num_of_each_class - - if png_format: - png_format_right_num += 1 - else: - png_format_wrong_num += 1 - - if cfg.DATASET.IGNORE_INDEX in grt_classes: - grt_classes2 = np.delete( - grt_classes, np.where(grt_classes == cfg.DATASET.IGNORE_INDEX)) - else: - grt_classes2 = grt_classes - if min(grt_classes2) < 0 or max(grt_classes2) > cfg.DATASET.NUM_CLASSES - 1: - is_label_correct = False - add_class = [] - add_num = [] - for i in range(len(grt_classes)): - gi = grt_classes[i] - if gi in total_grt_classes: - j = total_grt_classes.index(gi) - total_num_of_each_class[j] += num_of_each_class[i] - else: - add_class.append(gi) - add_num.append(num_of_each_class[i]) - total_num_of_each_class += add_num - total_grt_classes += add_class - return is_label_correct - - -def gt_check(): - """ - 对标注图像进行校验,输出校验结果 - """ - if png_format_wrong_num == 0: - if png_format_right_num: - logger.info(correct_print("label format check")) - else: - logger.info(error_print("label format check")) - logger.info("No label image to check") - return - else: - logger.info(error_print("label format check")) - logger.info( - "total {} label images are png format, {} label images are not png " - "format".format(png_format_right_num, png_format_wrong_num)) - if len(png_format_wrong_image) > 0: - for i in png_format_wrong_image: - logger.debug(i) - - total_ratio = total_num_of_each_class / sum(total_num_of_each_class) - total_ratio = np.around(total_ratio, decimals=4) - total_nc = sorted( - zip(total_grt_classes, total_num_of_each_class, total_ratio)) - logger.info( - "\nDoing label pixel statistics:\n" - "(label class, total pixel number, percentage) = {} ".format(total_nc)) - - if len(label_wrong) == 0 and not total_nc[0][0]: - logger.info(correct_print("label class check!")) - else: - logger.info(error_print("label class check!")) - if total_nc[0][0]: - logger.info("Warning: label classes should start from 0") - if len(label_wrong) > 0: - logger.info( - "fatal error: label class is out of range [0, {}]".format( - cfg.DATASET.NUM_CLASSES - 1)) - for i in label_wrong: - logger.debug(i) - - -def eval_crop_size_check(max_height, max_width, min_aspectratio, - max_aspectratio): - """ - 判断eval_crop_siz与验证集及测试集的max_height, max_width的关系 - param - max_height: 数据集的最大高 - max_width: 数据集的最大宽 - """ - - if cfg.AUG.AUG_METHOD == "stepscaling": - if max_width <= cfg.EVAL_CROP_SIZE[ - 0] and max_height <= cfg.EVAL_CROP_SIZE[1]: - logger.info(correct_print("EVAL_CROP_SIZE check")) - logger.info( - "satisfy current EVAL_CROP_SIZE: ({},{}) >= max width and max height of images: ({},{})" - .format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], max_width, - max_height)) - else: - logger.info(error_print("EVAL_CROP_SIZE check")) - if max_width > cfg.EVAL_CROP_SIZE[0]: - logger.info( - "EVAL_CROP_SIZE[0]: {} should >= max width of images {}!". - format(cfg.EVAL_CROP_SIZE[0], max_width)) - if max_height > cfg.EVAL_CROP_SIZE[1]: - logger.info( - "EVAL_CROP_SIZE[1]: {} should >= max height of images {}!". - format(cfg.EVAL_CROP_SIZE[1], max_height)) - - elif cfg.AUG.AUG_METHOD == "rangescaling": - if min_aspectratio <= 1 and max_aspectratio >= 1: - if cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.INF_RESIZE_VALUE \ - and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.INF_RESIZE_VALUE: - logger.info(correct_print("EVAL_CROP_SIZE check")) - logger.info( - "satisfy current EVAL_CROP_SIZE: ({},{}) >= ({},{}) ". - format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], - cfg.AUG.INF_RESIZE_VALUE, cfg.AUG.INF_RESIZE_VALUE)) - else: - logger.info(error_print("EVAL_CROP_SIZE check")) - logger.info( - "EVAL_CROP_SIZE must >= img size({},{}), current EVAL_CROP_SIZE is ({},{})" - .format(cfg.AUG.INF_RESIZE_VALUE, cfg.AUG.INF_RESIZE_VALUE, - cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1])) - elif min_aspectratio > 1: - max_height_rangscaling = cfg.AUG.INF_RESIZE_VALUE / min_aspectratio - max_height_rangscaling = round(max_height_rangscaling) - if cfg.EVAL_CROP_SIZE[ - 0] >= cfg.AUG.INF_RESIZE_VALUE and cfg.EVAL_CROP_SIZE[ - 1] >= max_height_rangscaling: - logger.info(correct_print("EVAL_CROP_SIZE check")) - logger.info( - "satisfy current EVAL_CROP_SIZE: ({},{}) >= ({},{}) ". - format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], - cfg.AUG.INF_RESIZE_VALUE, max_height_rangscaling)) - else: - logger.info(error_print("EVAL_CROP_SIZE check")) - logger.info( - "EVAL_CROP_SIZE must >= img size({},{}), current EVAL_CROP_SIZE is ({},{})" - .format(cfg.AUG.INF_RESIZE_VALUE, max_height_rangscaling, - cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1])) - elif max_aspectratio < 1: - max_width_rangscaling = cfg.AUG.INF_RESIZE_VALUE * max_aspectratio - max_width_rangscaling = round(max_width_rangscaling) - if cfg.EVAL_CROP_SIZE[ - 0] >= max_width_rangscaling and cfg.EVAL_CROP_SIZE[ - 1] >= cfg.AUG.INF_RESIZE_VALUE: - logger.info(correct_print("EVAL_CROP_SIZE check")) - logger.info( - "satisfy current EVAL_CROP_SIZE: ({},{}) >= ({},{}) ". - format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], - max_height_rangscaling, cfg.AUG.INF_RESIZE_VALUE)) - else: - logger.info(error_print("EVAL_CROP_SIZE check")) - logger.info( - "EVAL_CROP_SIZE must >= img size({},{}), current EVAL_CROP_SIZE is ({},{})" - .format(max_width_rangscaling, cfg.AUG.INF_RESIZE_VALUE, - cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1])) - elif cfg.AUG.AUG_METHOD == "unpadding": - if len(cfg.AUG.FIX_RESIZE_SIZE) != 2: - logger.info(error_print("EVAL_CROP_SIZE check")) - logger.info( - "you set AUG.AUG_METHOD = 'unpadding', but AUG.FIX_RESIZE_SIZE is wrong. " - "AUG.FIX_RESIZE_SIZE should be a tuple of length 2") - elif cfg.EVAL_CROP_SIZE[0] >= cfg.AUG.FIX_RESIZE_SIZE[0] \ - and cfg.EVAL_CROP_SIZE[1] >= cfg.AUG.FIX_RESIZE_SIZE[1]: - logger.info(correct_print("EVAL_CROP_SIZE check")) - logger.info( - "satisfy current EVAL_CROP_SIZE: ({},{}) >= AUG.FIX_RESIZE_SIZE: ({},{}) " - .format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], - cfg.AUG.FIX_RESIZE_SIZE[0], cfg.AUG.FIX_RESIZE_SIZE[1])) - else: - logger.info(error_print("EVAL_CROP_SIZE check")) - logger.info( - "EVAL_CROP_SIZE: ({},{}) must >= AUG.FIX_RESIZE_SIZE: ({},{})". - format(cfg.EVAL_CROP_SIZE[0], cfg.EVAL_CROP_SIZE[1], - cfg.AUG.FIX_RESIZE_SIZE[0], cfg.AUG.FIX_RESIZE_SIZE[1])) - else: - logger.info( - "\nERROR! cfg.AUG.AUG_METHOD setting wrong, it should be one of " - "[unpadding, stepscaling, rangescaling]") - - -def inf_resize_value_check(): - if cfg.AUG.AUG_METHOD == "rangescaling": - if cfg.AUG.INF_RESIZE_VALUE < cfg.AUG.MIN_RESIZE_VALUE or \ - cfg.AUG.INF_RESIZE_VALUE > cfg.AUG.MIN_RESIZE_VALUE: - logger.info( - "\nWARNING! you set AUG.AUG_METHOD = 'rangescaling'" - "AUG.INF_RESIZE_VALUE: {} not in [AUG.MIN_RESIZE_VALUE, AUG.MAX_RESIZE_VALUE]: " - "[{}, {}].".format(cfg.AUG.INF_RESIZE_VALUE, - cfg.AUG.MIN_RESIZE_VALUE, - cfg.AUG.MAX_RESIZE_VALUE)) - - -def image_type_check(img_dim): - """ - 验证图像的格式与DATASET.IMAGE_TYPE是否一致 - param - img_dim: 图像包含的通道数 - return - """ - if (1 in img_dim or 3 in img_dim) and cfg.DATASET.IMAGE_TYPE == 'rgba': - logger.info(error_print("DATASET.IMAGE_TYPE check")) - logger.info("DATASET.IMAGE_TYPE is {} but the type of image has " - "gray or rgb\n".format(cfg.DATASET.IMAGE_TYPE)) - elif (1 not in img_dim and 3 not in img_dim - and 4 in img_dim) and cfg.DATASET.IMAGE_TYPE == 'rgb': - logger.info(correct_print("DATASET.IMAGE_TYPE check")) - logger.info( - "\nWARNING: DATASET.IMAGE_TYPE is {} but the type of all image is rgba" - .format(cfg.DATASET.IMAGE_TYPE)) - else: - logger.info(correct_print("DATASET.IMAGE_TYPE check")) - - -def shape_check(): - """输出shape校验结果""" - if len(shape_unequal_image) == 0: - logger.info(correct_print("shape check")) - logger.info("All images are the same shape as the labels") - else: - logger.info(error_print("shape check")) - logger.info( - "Some images are not the same shape as the labels as follow: ") - for i in shape_unequal_image: - logger.debug(i) - - -def file_list_check(list_name): - """检查分割符是否复合要求""" - if len(list_wrong) == 0: - logger.info( - correct_print( - list_name.split(os.sep)[-1] + " DATASET.SEPARATOR check")) - else: - logger.info( - error_print( - list_name.split(os.sep)[-1] + " DATASET.SEPARATOR check")) - logger.info("The following list is not separated by {}".format( - cfg.DATASET.SEPARATOR)) - for i in list_wrong: - logger.debug(i) - - -def imread_check(): - if len(imread_failed) == 0: - logger.info(correct_print("dataset reading check")) - logger.info("All images can be read successfully") - else: - logger.info(error_print("dataset reading check")) - logger.info("Failed to read {} images".format(len(imread_failed))) - for i in imread_failed: - logger.debug(i) - - -def label_gray_check(): - if len(label_gray_wrong) == 0: - logger.info(correct_print("label gray check")) - logger.info("All label images are gray") - else: - logger.info(error_print("label gray check")) - logger.info( - "{} label images are not gray\nLabel pixel statistics may be insignificant" - .format(len(label_gray_wrong))) - for i in label_gray_wrong: - logger.debug(i) - - -def max_img_size_statistics(): - logger.info("\nDoing max image size statistics:") - logger.info("max width and max height of images are ({},{})".format( - max_width, max_height)) - - -def num_classes_loss_matching_check(): - loss_type = cfg.SOLVER.LOSS - num_classes = cfg.DATASET.NUM_CLASSES - if num_classes > 2 and (("dice_loss" in loss_type) or - ("bce_loss" in loss_type)): - logger.info( - error_print( - "loss check." - " Dice loss and bce loss is only applicable to binary classfication" - )) - else: - logger.info(correct_print("loss check")) - - -def check_train_dataset(): - list_file = cfg.DATASET.TRAIN_FILE_LIST - logger.info("-----------------------------\n1. Check train dataset...") - with open(list_file, 'r') as fid: - lines = fid.readlines() - for line in tqdm(lines): - line = line.strip() - parts = line.split(cfg.DATASET.SEPARATOR) - if len(parts) != 2: - list_wrong.append(line) - continue - img_name, grt_name = parts[0], parts[1] - img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name) - grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name) - try: - img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED) - grt = pil_imread(grt_path) - except Exception as e: - imread_failed.append((line, str(e))) - continue - - is_gray = is_label_gray(grt) - if not is_gray: - label_gray_wrong.append(line) - grt = cv2.cvtColor(grt, cv2.COLOR_BGR2GRAY) - get_image_max_height_width(img) - get_image_dim(img) - is_equal_img_grt_shape = image_label_shape_check(img, grt) - if not is_equal_img_grt_shape: - shape_unequal_image.append(line) - - png_format, grt_classes, num_of_each_class = ground_truth_check( - grt, grt_path) - if not png_format: - png_format_wrong_image.append(line) - is_label_correct = sum_gt_check(png_format, grt_classes, - num_of_each_class) - if not is_label_correct: - label_wrong.append(line) - - file_list_check(list_file) - imread_check() - label_gray_check() - gt_check() - image_type_check(img_dim) - max_img_size_statistics() - shape_check() - num_classes_loss_matching_check() - - -def check_val_dataset(): - list_file = cfg.DATASET.VAL_FILE_LIST - logger.info("\n-----------------------------\n2. Check val dataset...") - with open(list_file) as fid: - lines = fid.readlines() - for line in tqdm(lines): - line = line.strip() - parts = line.split(cfg.DATASET.SEPARATOR) - if len(parts) != 2: - list_wrong.append(line) - continue - img_name, grt_name = parts[0], parts[1] - img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name) - grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name) - try: - img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED) - grt = pil_imread(grt_path) - except Exception as e: - imread_failed.append((line, str(e))) - continue - - is_gray = is_label_gray(grt) - if not is_gray: - label_gray_wrong.append(line) - grt = cv2.cvtColor(grt, cv2.COLOR_BGR2GRAY) - get_image_max_height_width(img) - get_image_min_max_aspectratio(img) - get_image_dim(img) - is_equal_img_grt_shape = image_label_shape_check(img, grt) - if not is_equal_img_grt_shape: - shape_unequal_image.append(line) - png_format, grt_classes, num_of_each_class = ground_truth_check( - grt, grt_path) - if not png_format: - png_format_wrong_image.append(line) - is_label_correct = sum_gt_check(png_format, grt_classes, - num_of_each_class) - if not is_label_correct: - label_wrong.append(line) - - file_list_check(list_file) - imread_check() - label_gray_check() - gt_check() - image_type_check(img_dim) - max_img_size_statistics() - shape_check() - eval_crop_size_check(max_height, max_width, min_aspectratio, - max_aspectratio) - - -def check_test_dataset(): - list_file = cfg.DATASET.TEST_FILE_LIST - has_label = False - with open(list_file) as fid: - logger.info("\n-----------------------------\n3. Check test dataset...") - lines = fid.readlines() - for line in tqdm(lines): - line = line.strip() - parts = line.split(cfg.DATASET.SEPARATOR) - if len(parts) == 1: - img_name = parts - img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name[0]) - try: - img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED) - except Exception as e: - imread_failed.append((line, str(e))) - continue - elif len(parts) == 2: - has_label = True - img_name, grt_name = parts[0], parts[1] - img_path = os.path.join(cfg.DATASET.DATA_DIR, img_name) - grt_path = os.path.join(cfg.DATASET.DATA_DIR, grt_name) - try: - img = cv2_imread(img_path, cv2.IMREAD_UNCHANGED) - grt = pil_imread(grt_path) - except Exception as e: - imread_failed.append((line, str(e))) - continue - - is_gray = is_label_gray(grt) - if not is_gray: - label_gray_wrong.append(line) - grt = cv2.cvtColor(grt, cv2.COLOR_BGR2GRAY) - is_equal_img_grt_shape = image_label_shape_check(img, grt) - if not is_equal_img_grt_shape: - shape_unequal_image.append(line) - png_format, grt_classes, num_of_each_class = ground_truth_check( - grt, grt_path) - if not png_format: - png_format_wrong_image.append(line) - is_label_correct = sum_gt_check(png_format, grt_classes, - num_of_each_class) - if not is_label_correct: - label_wrong.append(line) - else: - list_wrong.append(lines) - continue - get_image_max_height_width(img) - get_image_min_max_aspectratio(img) - get_image_dim(img) - - file_list_check(list_file) - imread_check() - if has_label: - label_gray_check() - if has_label: - gt_check() - image_type_check(img_dim) - max_img_size_statistics() - if has_label: - shape_check() - eval_crop_size_check(max_height, max_width, min_aspectratio, - max_aspectratio) - - -def main(args): - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - cfg.check_and_infer() - logger.info(pprint.pformat(cfg)) - - init_global_variable() - check_train_dataset() - - init_global_variable() - check_val_dataset() - - init_global_variable() - check_test_dataset() - - inf_resize_value_check() - - print("\nDetailed error information can be viewed in detail.log file.") - - -if __name__ == "__main__": - args = parse_args() - logger = logging.getLogger() - logger.setLevel('DEBUG') - BASIC_FORMAT = "%(message)s" - formatter = logging.Formatter(BASIC_FORMAT) - sh = logging.StreamHandler() - sh.setFormatter(formatter) - sh.setLevel('INFO') - th = logging.FileHandler('detail.log', 'w') - th.setFormatter(formatter) - logger.addHandler(sh) - logger.addHandler(th) - main(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/data_aug.py b/legacy/contrib/NeurIPS_SN7/pdseg/data_aug.py deleted file mode 100644 index f4083ede2d..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/data_aug.py +++ /dev/null @@ -1,526 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import cv2 -import numpy as np -from utils.config import cfg -from models.model_builder import ModelPhase - - -def resize(img1, img2=None, grt1=None, grt2=None, mode=ModelPhase.TRAIN): - """ - 改变图像及标签图像尺寸 - AUG.AUG_METHOD为unpadding,所有模式均直接resize到AUG.FIX_RESIZE_SIZE的尺寸 - AUG.AUG_METHOD为stepscaling, 按比例resize,训练时比例范围AUG.MIN_SCALE_FACTOR到AUG.MAX_SCALE_FACTOR,间隔为AUG.SCALE_STEP_SIZE,其他模式返回原图 - AUG.AUG_METHOD为rangescaling,长边对齐,短边按比例变化,训练时长边对齐范围AUG.MIN_RESIZE_VALUE到AUG.MAX_RESIZE_VALUE,其他模式长边对齐AUG.INF_RESIZE_VALUE - - Args: - img(numpy.ndarray): 输入图像 - grt(numpy.ndarray): 标签图像,默认为None - mode(string): 模式, 默认训练模式,即ModelPhase.TRAIN - - Returns: - resize后的图像和标签图 - - """ - if cfg.AUG.AUG_METHOD == 'unpadding': - target_size = cfg.AUG.FIX_RESIZE_SIZE - img1 = cv2.resize(img1, target_size, interpolation=cv2.INTER_LINEAR) - if img2 is not None: - img2 = cv2.resize(img2, target_size, interpolation=cv2.INTER_LINEAR) - if grt1 is not None: - grt1 = cv2.resize( - grt1, target_size, interpolation=cv2.INTER_NEAREST) - if grt2 is not None: - grt2 = cv2.resize( - grt2, target_size, interpolation=cv2.INTER_NEAREST) - elif cfg.AUG.AUG_METHOD == 'stepscaling': - if mode == ModelPhase.TRAIN: - min_scale_factor = cfg.AUG.MIN_SCALE_FACTOR - max_scale_factor = cfg.AUG.MAX_SCALE_FACTOR - step_size = cfg.AUG.SCALE_STEP_SIZE - scale_factor = get_random_scale(min_scale_factor, max_scale_factor, - step_size) - img1, img2, grt1, grt2 = randomly_scale_image_and_label( - img1, img2, grt1, grt2, scale=scale_factor) - elif cfg.AUG.AUG_METHOD == 'rangescaling': - min_resize_value = cfg.AUG.MIN_RESIZE_VALUE - max_resize_value = cfg.AUG.MAX_RESIZE_VALUE - if mode == ModelPhase.TRAIN: - if min_resize_value == max_resize_value: - random_size = min_resize_value - else: - random_size = int( - np.random.uniform(min_resize_value, max_resize_value) + 0.5) - else: - random_size = cfg.AUG.INF_RESIZE_VALUE - - value = max(img1.shape[0], img1.shape[1]) - scale = float(random_size) / float(value) - img1 = cv2.resize( - img1, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) - if img2 is not None: - img2 = cv2.resize( - img2, (0, 0), - fx=scale, - fy=scale, - interpolation=cv2.INTER_LINEAR) - if grt1 is not None: - grt1 = cv2.resize( - grt1, (0, 0), - fx=scale, - fy=scale, - interpolation=cv2.INTER_NEAREST) - if grt2 is not None: - grt2 = cv2.resize( - grt2, (0, 0), - fx=scale, - fy=scale, - interpolation=cv2.INTER_NEAREST) - else: - raise Exception("Unexpect data augmention method: {}".format( - cfg.AUG.AUG_METHOD)) - - return img1, img2, grt1, grt2 - - -def get_random_scale(min_scale_factor, max_scale_factor, step_size): - """ - 在一定范围内得到随机值,范围为min_scale_factor到max_scale_factor,间隔为step_size - - Args: - min_scale_factor(float): 随机尺度下限,大于0 - max_scale_factor(float): 随机尺度上限,不小于下限值 - step_size(float): 尺度间隔,非负, 等于为0时直接返回min_scale_factor到max_scale_factor范围内任一值 - - Returns: - 随机尺度值 - - """ - - if min_scale_factor < 0 or min_scale_factor > max_scale_factor: - raise ValueError('Unexpected value of min_scale_factor.') - - if min_scale_factor == max_scale_factor: - return min_scale_factor - - if step_size == 0: - return np.random.uniform(min_scale_factor, max_scale_factor) - - num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1) - scale_factors = np.linspace(min_scale_factor, max_scale_factor, - num_steps).tolist() - np.random.shuffle(scale_factors) - return scale_factors[0] - - -def randomly_scale_image_and_label(img1, - img2=None, - grt1=None, - grt2=None, - scale=1.0): - """ - 按比例resize图像和标签图, 如果scale为1,返回原图 - - Args: - image(numpy.ndarray): 输入图像 - label(numpy.ndarray): 标签图,默认None - sclae(float): 图片resize的比例,非负,默认1.0 - - Returns: - resize后的图像和标签图 - - """ - - if scale == 1.0: - return img1, img2, grt1, grt2 - - height = img1.shape[0] - width = img1.shape[1] - new_height = int(height * scale + 0.5) - new_width = int(width * scale + 0.5) - - img1 = cv2.resize( - img1, (new_width, new_height), interpolation=cv2.INTER_LINEAR) - if img2 is not None: - img2 = cv2.resize( - img2, (new_width, new_height), interpolation=cv2.INTER_LINEAR) - if grt1 is not None: - grt1 = cv2.resize( - grt1, (new_width, new_height), interpolation=cv2.INTER_NEAREST) - if grt2 is not None: - grt2 = cv2.resize( - grt2, (new_width, new_height), interpolation=cv2.INTER_NEAREST) - - return img1, img2, grt1, grt2 - - -def random_rotation(crop_img1, crop_img2, crop_seg1, crop_seg2, - rich_crop_max_rotation, mean_value): - """ - 随机旋转图像和标签图 - - Args: - crop_img(numpy.ndarray): 输入图像 - crop_seg(numpy.ndarray): 标签图 - rich_crop_max_rotation(int):旋转最大角度,0-90 - mean_value(list):均值, 对图片旋转产生的多余区域使用均值填充 - - Returns: - 旋转后的图像和标签图 - - """ - ignore_index = cfg.DATASET.IGNORE_INDEX - if rich_crop_max_rotation > 0: - (h, w) = crop_img1.shape[:2] - do_rotation = np.random.uniform(-rich_crop_max_rotation, - rich_crop_max_rotation) - pc = (w // 2, h // 2) - r = cv2.getRotationMatrix2D(pc, do_rotation, 1.0) - cos = np.abs(r[0, 0]) - sin = np.abs(r[0, 1]) - - nw = int((h * sin) + (w * cos)) - nh = int((h * cos) + (w * sin)) - - (cx, cy) = pc - r[0, 2] += (nw / 2) - cx - r[1, 2] += (nh / 2) - cy - dsize = (nw, nh) - - crop_img1 = cv2.warpAffine( - crop_img1, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=mean_value) - if crop_img2 is not None: - crop_img2 = cv2.warpAffine( - crop_img2, - r, - dsize=dsize, - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=mean_value) - - crop_seg1 = cv2.warpAffine( - crop_seg1, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(ignore_index, ignore_index, ignore_index)) - if crop_seg2 is not None: - crop_seg2 = cv2.warpAffine( - crop_seg2, - r, - dsize=dsize, - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(ignore_index, ignore_index, ignore_index)) - return crop_img1, crop_img2, crop_seg1, crop_seg2 - - -def rand_scale_aspect(crop_img1, - crop_img2, - crop_seg1, - crop_seg2, - rich_crop_min_scale=0, - rich_crop_aspect_ratio=0): - """ - 从输入图像和标签图像中裁取随机宽高比的图像,并reszie回原始尺寸 - - Args: - crop_img(numpy.ndarray): 输入图像 - crop_seg(numpy.ndarray): 标签图像 - rich_crop_min_scale(float):裁取图像占原始图像的面积比,0-1,默认0返回原图 - rich_crop_aspect_ratio(float): 裁取图像的宽高比范围,非负,默认0返回原图 - - Returns: - 裁剪并resize回原始尺寸的图像和标签图像 - - """ - if rich_crop_min_scale == 0 or rich_crop_aspect_ratio == 0: - return crop_img1, crop_img2, crop_seg1, crop_seg2 - else: - img_height = crop_img1.shape[0] - img_width = crop_img1.shape[1] - for i in range(0, 10): - area = img_height * img_width - target_area = area * np.random.uniform(rich_crop_min_scale, 1.0) - aspectRatio = np.random.uniform(rich_crop_aspect_ratio, - 1.0 / rich_crop_aspect_ratio) - - dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) - dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) - if np.random.randint(10) < 5: - tmp = dw - dw = dh - dh = tmp - - if dh < img_height and dw < img_width: - h1 = np.random.randint(0, img_height - dh) - w1 = np.random.randint(0, img_width - dw) - - crop_img1 = crop_img1[h1:(h1 + dh), w1:(w1 + dw), :] - if crop_img2 is not None: - crop_img2 = crop_img2[h1:(h1 + dh), w1:(w1 + dw), :] - crop_seg1 = crop_seg1[h1:(h1 + dh), w1:(w1 + dw)] - if crop_seg2 is not None: - crop_seg2 = crop_seg2[h1:(h1 + dh), w1:(w1 + dw)] - - crop_img1 = cv2.resize( - crop_img1, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - if crop_img2 is not None: - crop_img2 = cv2.resize( - crop_img2, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - crop_seg1 = cv2.resize( - crop_seg1, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) - if crop_seg2 is not None: - crop_seg2 = cv2.resize( - crop_seg2, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) - - break - - return crop_img1, crop_img2, crop_seg1, crop_seg2 - - -def saturation_jitter(cv_img, jitter_range): - """ - 调节图像饱和度 - - Args: - cv_img(numpy.ndarray): 输入图像 - jitter_range(float): 调节程度,0-1 - - Returns: - 饱和度调整后的图像 - - """ - - greyMat = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) - greyMat = greyMat[:, :, None] * np.ones(3, dtype=int)[None, None, :] - cv_img = cv_img.astype(np.float32) - cv_img = cv_img * (1 - jitter_range) + jitter_range * greyMat - cv_img = np.where(cv_img > 255, 255, cv_img) - cv_img = cv_img.astype(np.uint8) - return cv_img - - -def brightness_jitter(cv_img, jitter_range): - """ - 调节图像亮度 - - Args: - cv_img(numpy.ndarray): 输入图像 - jitter_range(float): 调节程度,0-1 - - Returns: - 亮度调整后的图像 - - """ - - cv_img = cv_img.astype(np.float32) - cv_img = cv_img * (1.0 - jitter_range) - cv_img = np.where(cv_img > 255, 255, cv_img) - cv_img = cv_img.astype(np.uint8) - return cv_img - - -def contrast_jitter(cv_img, jitter_range): - """ - 调节图像对比度 - - Args: - cv_img(numpy.ndarray): 输入图像 - jitter_range(float): 调节程度,0-1 - - Returns: - 对比度调整后的图像 - - """ - - greyMat = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) - mean = np.mean(greyMat) - cv_img = cv_img.astype(np.float32) - cv_img = cv_img * (1 - jitter_range) + jitter_range * mean - cv_img = np.where(cv_img > 255, 255, cv_img) - cv_img = cv_img.astype(np.uint8) - return cv_img - - -def random_jitter(cv_img, saturation_range, brightness_range, contrast_range): - """ - 图像亮度、饱和度、对比度调节,在调整范围内随机获得调节比例,并随机顺序叠加三种效果 - - Args: - cv_img(numpy.ndarray): 输入图像 - saturation_range(float): 饱和对调节范围,0-1 - brightness_range(float): 亮度调节范围,0-1 - contrast_range(float): 对比度调节范围,0-1 - - Returns: - 亮度、饱和度、对比度调整后图像 - - """ - - saturation_ratio = np.random.uniform(-saturation_range, saturation_range) - brightness_ratio = np.random.uniform(-brightness_range, brightness_range) - contrast_ratio = np.random.uniform(-contrast_range, contrast_range) - - order = [0, 1, 2] - np.random.shuffle(order) - - for i in range(3): - if order[i] == 0: - cv_img = saturation_jitter(cv_img, saturation_ratio) - if order[i] == 1: - cv_img = brightness_jitter(cv_img, brightness_ratio) - if order[i] == 2: - cv_img = contrast_jitter(cv_img, contrast_ratio) - return cv_img - - -def hsv_color_jitter(crop_img1, - crop_img2, - brightness_jitter_ratio=0, - saturation_jitter_ratio=0, - contrast_jitter_ratio=0): - """ - 图像亮度、饱和度、对比度调节 - - Args: - crop_img(numpy.ndarray): 输入图像 - brightness_jitter_ratio(float): 亮度调节度最大值,1-0,默认0 - saturation_jitter_ratio(float): 饱和度调节度最大值,1-0,默认0 - contrast_jitter_ratio(float): 对比度调节度最大值,1-0,默认0 - - Returns: - 亮度、饱和度、对比度调节后图像 - - """ - - if brightness_jitter_ratio > 0 or \ - saturation_jitter_ratio > 0 or \ - contrast_jitter_ratio > 0: - crop_img1 = random_jitter(crop_img1, saturation_jitter_ratio, - brightness_jitter_ratio, - contrast_jitter_ratio) - if crop_img2 is not None: - crop_img2 = random_jitter(crop_img2, saturation_jitter_ratio, - brightness_jitter_ratio, - contrast_jitter_ratio) - return crop_img1, crop_img2 - - -def rand_crop(crop_img1, crop_img2, crop_seg1, crop_seg2, - mode=ModelPhase.TRAIN): - """ - 随机裁剪图片和标签图, 若crop尺寸大于原始尺寸,分别使用DATASET.PADDING_VALUE值和DATASET.IGNORE_INDEX值填充再进行crop, - crop尺寸与原始尺寸一致,返回原图,crop尺寸小于原始尺寸直接crop - - Args: - crop_img(numpy.ndarray): 输入图像 - crop_seg(numpy.ndarray): 标签图 - mode(string): 模式, 默认训练模式,验证或预测、可视化模式时crop尺寸需大于原始图片尺寸 - - Returns: - 裁剪后的图片和标签图 - - """ - - img_height = crop_img1.shape[0] - img_width = crop_img1.shape[1] - - if ModelPhase.is_train(mode): - crop_width = cfg.TRAIN_CROP_SIZE[0] - crop_height = cfg.TRAIN_CROP_SIZE[1] - else: - crop_width = cfg.EVAL_CROP_SIZE[0] - crop_height = cfg.EVAL_CROP_SIZE[1] - - if not ModelPhase.is_train(mode): - if crop_height < img_height or crop_width < img_width: - raise Exception( - "Crop size({},{}) must large than img size({},{}) when in EvalPhase." - .format(crop_width, crop_height, img_width, img_height)) - - if img_height == crop_height and img_width == crop_width: - return crop_img1, crop_img2, crop_seg1, crop_seg2 - else: - pad_height = max(crop_height - img_height, 0) - pad_width = max(crop_width - img_width, 0) - if pad_height > 0 or pad_width > 0: - crop_img1 = cv2.copyMakeBorder( - crop_img1, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=cfg.DATASET.PADDING_VALUE) - if crop_img2 is not None: - crop_img2 = cv2.copyMakeBorder( - crop_img2, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=cfg.DATASET.PADDING_VALUE) - if crop_seg1 is not None: - crop_seg1 = cv2.copyMakeBorder( - crop_seg1, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=cfg.DATASET.IGNORE_INDEX) - if crop_seg2 is not None: - crop_seg2 = cv2.copyMakeBorder( - crop_seg2, - 0, - pad_height, - 0, - pad_width, - cv2.BORDER_CONSTANT, - value=cfg.DATASET.IGNORE_INDEX) - - img_height = crop_img1.shape[0] - img_width = crop_img1.shape[1] - - if crop_height > 0 and crop_width > 0: - h_off = np.random.randint(img_height - crop_height + 1) - w_off = np.random.randint(img_width - crop_width + 1) - - crop_img1 = crop_img1[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] - if crop_img2 is not None: - crop_img2 = crop_img2[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] - if crop_seg1 is not None: - crop_seg1 = crop_seg1[h_off:(crop_height + h_off), w_off:( - w_off + crop_width)] - if crop_seg2 is not None: - crop_seg2 = crop_seg2[h_off:(crop_height + h_off), w_off:( - w_off + crop_width)] - return crop_img1, crop_img2, crop_seg1, crop_seg2 diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/data_utils.py b/legacy/contrib/NeurIPS_SN7/pdseg/data_utils.py deleted file mode 100644 index a78e2e6ddf..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/data_utils.py +++ /dev/null @@ -1,129 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -This code is based on https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py -""" - -import time -import numpy as np -import threading -import multiprocessing -try: - import queue -except ImportError: - import Queue as queue - - -class GeneratorEnqueuer(object): - """ - Multiple generators - - Args: - generators: - wait_time (float): time to sleep in-between calls to `put()`. - """ - - def __init__(self, generators, wait_time=0.05): - self.wait_time = wait_time - self._generators = generators - self._threads = [] - self._stop_events = [] - self.queue = None - self._manager = None - self.workers = 1 - - def start(self, workers=1, max_queue_size=16): - """ - Start worker threads which add data from the generator into the queue. - - Args: - workers (int): number of worker threads - max_queue_size (int): queue size - (when full, threads could block on `put()`) - """ - - self.workers = workers - - def data_generator_task(pid): - """ - Data generator task. - """ - - def task(pid): - if (self.queue is not None - and self.queue.qsize() < max_queue_size): - generator_output = next(self._generators[pid]) - self.queue.put((generator_output)) - else: - time.sleep(self.wait_time) - - while not self._stop_events[pid].is_set(): - try: - task(pid) - except Exception: - self._stop_events[pid].set() - break - - try: - self._manager = multiprocessing.Manager() - self.queue = self._manager.Queue(maxsize=max_queue_size) - for pid in range(self.workers): - self._stop_events.append(multiprocessing.Event()) - thread = multiprocessing.Process( - target=data_generator_task, args=(pid, )) - thread.daemon = True - self._threads.append(thread) - thread.start() - except: - self.stop() - raise - - def is_running(self): - """ - Returns: - bool: Whether the worker theads are running. - """ - - # If queue is not empty then still in runing state wait for consumer - if not self.queue.empty(): - return True - - for pid in range(self.workers): - if not self._stop_events[pid].is_set(): - return True - - return False - - def stop(self, timeout=None): - """ - Stops running threads and wait for them to exit, if necessary. - Should be called by the same thread which called `start()`. - - Args: - timeout(int|None): maximum time to wait on `thread.join()`. - """ - if self.is_running(): - for pid in range(self.workers): - self._stop_events[pid].set() - - for thread in self._threads: - if thread.is_alive(): - thread.join(timeout) - if self._manager: - self._manager.shutdown() - - self._threads = [] - self._stop_events = [] - self.queue = None diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/demo_clinet.py b/legacy/contrib/NeurIPS_SN7/pdseg/demo_clinet.py deleted file mode 100644 index b696758054..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/demo_clinet.py +++ /dev/null @@ -1,201 +0,0 @@ -import sys -import cv2 -from PIL import Image -import numpy as np -import math -import requests -import multiprocessing -import json -import os -import base64 -import cv2 - -# ###### common configs ###### - -# pre resize -pre_height = None -pre_width = None -# final output size -target_height = 1250 -target_width = 1250 -# stride -height_stride = 1250 -width_stride = 1250 -# padding, always the same as ignore pixel -padding_pixel = 0 -# url -url = "http://10.255.94.19:8000/put_image" - -# ########################### - - -def get_color_map_list(num_classes): - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j, lab = 0, i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - return color_map - - -def compose_img(im_list, rows, cols, save_file): - image = Image.new('P', - (cols * target_width, rows * target_height)) # 创建一个新图 - for y in range(rows): - for x in range(cols): - patch = Image.fromarray(im_list[cols * y + x]) - image.paste(patch, (x * target_width, y * target_height)) - - color_map = get_color_map_list(256) - image.putpalette(color_map) - image.save(save_file) - - -def divide_img(img): - src_im_height = img.shape[0] - src_im_width = img.shape[1] - - ret_imgs = [] - - x1, y1, idx = 0, 0, 0 - while y1 < src_im_height: - y2 = y1 + target_height - while x1 < src_im_width: - x2 = x1 + target_width - img_crop = img[y1:y2, x1:x2] - if y2 > src_im_height or x2 > src_im_width: - pad_bottom = y2 - src_im_height if y2 > src_im_height else 0 - pad_right = x2 - src_im_width if x2 > src_im_width else 0 - img_crop = cv2.copyMakeBorder( - img_crop, - 0, - pad_bottom, - 0, - pad_right, - cv2.BORDER_CONSTANT, - value=padding_pixel) - ret_imgs.append(img_crop) - x1 += width_stride - idx += 1 - x1 = 0 - y1 += height_stride - - return ret_imgs - - -def encode(img): - img_str = base64.b64encode(cv2.imencode('.png', img)[1]).decode() - return img_str - - -def decode(img_str, color=True): - img_byte = base64.b64decode(img_str) - img_np_arr = np.fromstring(img_byte, np.uint8) - if color: - img = cv2.imdecode(img_np_arr, cv2.IMREAD_COLOR) - else: - img = cv2.imdecode(img_np_arr, cv2.IMREAD_GRAYSCALE) - return img - - -def send(img1, img2): - if img2 is not None: - data = { - 'img1': encode(img1), - 'img2': encode(img2), - } - else: - data = {'img1': encode(img1)} - return requests.post(url, data=json.dumps(data)).content - - -def send_request(img1, img2, idx): #, res1_dict, res2_dict, diff_dict): - ret = json.loads(send(img1, img2)) - res1 = decode(ret['res_map1'], color=False) - res1_dict[idx] = res1 - if img2 is not None: - res2 = decode(ret['res_map2'], color=False) - diff = decode(ret['diff'], color=False) - res2_dict[idx] = res2 - diff_dict[idx] = diff - - -def divide_and_infer(img1, img2, save_dir): - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - src_im_height = img1.shape[0] - src_im_width = img1.shape[1] - - cols = math.ceil(src_im_width / target_width) - rows = math.ceil(src_im_height / target_height) - nums = cols * rows - - patch1 = divide_img(img1) - patch2 = divide_img(img2) if img2 is not None else [None] * nums - print("divide into %d patch" % nums) - - global res1_dict, res2_dict, diff_dict - res1_dict, res2_dict, diff_dict = {}, {}, {} - for i in range(nums): - send_request(patch1[i], patch2[i], i) - - res1_list = [res1_dict[i] for i in range(nums)] - compose_img(res1_list, rows, cols, save_dir + "/res1.png") - if img2 is not None: - res2_list = [res2_dict[i] for i in range(nums)] - compose_img(res2_list, rows, cols, save_dir + "/res2.png") - diff_list = [diff_dict[i] for i in range(nums)] - compose_img(diff_list, rows, cols, save_dir + "/diff.png") - - # # multiprocess - # res1_dict = multiprocessing.Manager() - # res2_dict = multiprocessing.Manager() - # diff_dict = multiprocessing.Manager() - # - # p_num = 4 - # pool = multiprocessing.Pool(p_num) - # pool.map(send_request, - # (patch1, patch2, list(range(nums)), res1_dict, res2_dict, diff_dict)) - # - # res1_dict = dict(res1_dict) - # res1_list = [res1_dict[i] for i in range(nums)] - # compose_img(res1_list, rows, cols, save_dir + "/res1.png") - # - # res2_dict = dict(res2_dict) - # res2_list = [res2_dict[i] for i in range(nums)] - # compose_img(res2_list, rows, cols, save_dir + "/res2.png") - # - # diff_dict = dict(diff_dict) - # diff_list = [diff_dict[i] for i in range(nums)] - # compose_img(diff_list, rows, cols, save_dir + "/diff.png") - - -def main(im1_file, im2_file, save_dir): - img1 = cv2.imdecode(np.fromfile(im1_file, dtype=np.uint8), cv2.IMREAD_COLOR) - img2 = None - if im2_file is not None: - img2 = cv2.imdecode( - np.fromfile(im2_file, dtype=np.uint8), cv2.IMREAD_COLOR) - - if pre_height and pre_width: - img1 = cv2.resize(img1, (pre_height, pre_width)) - if img2 is not None: - img2 = cv2.resize(img2, (pre_height, pre_width)) - divide_and_infer(img1, img2, save_dir) - - -if __name__ == '__main__': - args = sys.argv - if len(args) == 2: - im1_file = args[1] - im2_file = None - else: - im1_file = args[1] - im2_file = args[2] - - main(im1_file, im2_file, save_dir="result") diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/demo_server.py b/legacy/contrib/NeurIPS_SN7/pdseg/demo_server.py deleted file mode 100644 index 9174823625..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/demo_server.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import json -from flask import Flask -from flask import request - -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import numpy as np -from PIL import Image -import paddle.fluid as fluid - -from utils.config import cfg -cfg.update_from_file("demo_server.yaml") -from models.model_builder import build_model -from models.model_builder import ModelPhase - -server = Flask(__name__) - - -# ###### global init ###### -def load_model(): - startup_prog = fluid.Program() - test_prog = fluid.Program() - pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) - fetch_list = [pred.name] - # Clone forward graph - test_prog = test_prog.clone(for_test=True) - - # Get device environment - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(startup_prog) - - ckpt_dir = cfg.TEST.TEST_MODEL - if ckpt_dir is not None: - print('load test model:', ckpt_dir) - try: - fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) - except: - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - # # Get device environment - # places = [fluid.CUDAPlace(i) for i in range(4)] - # exes = [fluid.Executor(places[i]) for i in range(4)] - # for exe in exes: - # exe.run(startup_prog) - # - # ckpt_dir = cfg.TEST.TEST_MODEL - # if ckpt_dir is not None: - # print('load test model:', ckpt_dir) - # for i in range(4): - # try: - # fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exes[i]) - # except: - # fluid.io.load_params(exes[i], ckpt_dir, main_program=test_prog) - - return fetch_list, test_prog, exe #s - - -fetch_list_diff, test_prog_diff, exe_diff = load_model() - -cfg.DATASET.INPUT_IMAGE_NUM = 1 -fetch_list_seg, test_prog_seg, exe_seg = load_model() - - -# ###### inference ###### -def normalize_image(img): - img = img.transpose((2, 0, 1)).astype('float32') / 255.0 - img_mean = np.array(cfg.MEAN).reshape((len(cfg.MEAN), 1, 1)) - img_std = np.array(cfg.STD).reshape((len(cfg.STD), 1, 1)) - img -= img_mean - img /= img_std - return img - - -def inference(img1, img2, fetch_list, test_prog, exe): - img1 = normalize_image(img1)[np.newaxis, :, :, :] - if img2 is not None: - img2 = normalize_image(img2)[np.newaxis, :, :, :] - pred, = exe.run( - program=test_prog, - feed={ - 'image1': img1, - 'image2': img2 - }, - fetch_list=fetch_list, - return_numpy=True) - - idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM - pred1, pred2 = pred[:idx], pred[ - idx:] # fluid.layers.split(pred, 2, dim=0) - res_map1 = np.squeeze(pred1[0, :, :, :]).astype(np.uint8) - res_map2 = np.squeeze(pred2[0, :, :, :]).astype(np.uint8) - - unchange_idx = np.where((res_map1 - res_map2) == 0) - diff = res_map1 * 8 + res_map2 - diff[unchange_idx] = 0 - return res_map1, res_map2, diff - else: - pred, = exe.run( - program=test_prog, - feed={'image1': img1}, - fetch_list=fetch_list, - return_numpy=True) - res_map = np.squeeze(pred[0, :, :, :]).astype(np.uint8) - return res_map - - -@server.route('/put_image', methods=['GET', 'POST']) -def listen(): - data = json.loads(request.data) - img1 = np.array(data['img1']) - img2 = None - if 'img2' in data: - img2 = np.array(data['img2']) - cfg.DATASET.INPUT_IMAGE_NUM = 2 - - res_map1, res_map2, diff = inference( - img1, img2, fetch_list_diff, test_prog_diff, exe_diff) #s[idx % 4]) - ret = { - "res_map1": res_map1.tolist(), - "res_map2": res_map2.tolist(), - "diff": diff.tolist(), - } - else: - cfg.DATASET.INPUT_IMAGE_NUM = 1 - res_map = inference(img1, img2, fetch_list_seg, test_prog_seg, exe_seg) - ret = {"res_map1": res_map.tolist()} - return json.dumps(ret) - - -def main(): - server.run(host='10.255.94.19', port=8000, debug=True) - - -if __name__ == '__main__': - main() diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/eval.py b/legacy/contrib/NeurIPS_SN7/pdseg/eval.py deleted file mode 100644 index c44018bfe7..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/eval.py +++ /dev/null @@ -1,219 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys -import argparse -import pprint -import numpy as np -import paddle.fluid as fluid - -from utils.config import cfg -from utils.timer import Timer, calculate_eta -from models.model_builder import build_model -from models.model_builder import ModelPhase -from reader import SegDataset -from metrics import ConfusionMatrix - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddleSeg model evalution') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument('--vis', action='store_true', default=False) - parser.add_argument( - '--vis_dir', - dest='vis_dir', - help='visual save dir', - type=str, - default='vis_out/test_public') - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess IO or not', - action='store_true', - default=False) - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def evaluate(cfg, - ckpt_dir=None, - use_gpu=False, - vis=False, - vis_dir='vis_out/test_public', - use_mpio=False, - **kwargs): - np.set_printoptions(precision=5, suppress=True) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - dataset = SegDataset( - file_list=cfg.DATASET.VAL_FILE_LIST, - mode=ModelPhase.EVAL, - data_dir=cfg.DATASET.DATA_DIR) - - fls = [] - with open(cfg.DATASET.VAL_FILE_LIST) as fr: - for line in fr.readlines(): - fls.append(line.strip().split(' ')[0]) - if vis: - assert cfg.VIS.VISINEVAL is True - if not os.path.exists(vis_dir): - os.makedirs(vis_dir) - - def data_generator(): - #TODO: check is batch reader compatitable with Windows - if use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - for b in data_gen: - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - yield b[0], b[1], b[2] - else: - yield b[0], b[1], b[2], b[3] - - data_loader, avg_loss, pred, grts, masks = build_model( - test_prog, startup_prog, phase=ModelPhase.EVAL) - - data_loader.set_sample_generator( - data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) - - # Get device environment - places = fluid.cuda_places() if use_gpu else fluid.cpu_places() - place = places[0] - dev_count = len(places) - print("#Device count: {}".format(dev_count)) - - exe = fluid.Executor(place) - exe.run(startup_prog) - test_prog = test_prog.clone(for_test=True) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - if not os.path.exists(ckpt_dir): - raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir)) - - if ckpt_dir is not None: - print('load test model:', ckpt_dir) - try: - fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) - except: - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - # Use streaming confusion matrix to calculate mean_iou - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - class_num = cfg.DATASET.NUM_CLASSES - conf_mat = ConfusionMatrix(class_num, streaming=True) - fetch_list = [avg_loss.name, pred.name, grts.name, masks.name] - num_images = 0 - step = 0 - all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 - timer = Timer() - timer.start() - data_loader.start() - cnt = 0 - while True: - try: - step += 1 - loss, pred, grts, masks = exe.run( - test_prog, fetch_list=fetch_list, return_numpy=True) - if vis: - preds = np.array(pred, dtype=np.float32) - for j in range(preds.shape[0]): - if cnt > len(fls): continue - name = fls[cnt].split('/')[-1].split('.')[0] - p = np.squeeze(preds[j]) - np.save(os.path.join(vis_dir, name + '.npy'), p) - cnt += 1 - print('vis %d npy... (%d tif sample)' % (cnt, cnt // 36)) - continue - - loss = np.mean(np.array(loss)) - - num_images += pred.shape[0] - conf_mat.calculate(pred, grts, masks) - _, iou = conf_mat.mean_iou() - _, acc = conf_mat.accuracy() - fwiou = conf_mat.frequency_weighted_iou() - - speed = 1.0 / timer.elapsed_time() - - print( - "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} FWIoU={:.4f} step/sec={:.2f} | ETA {}" - .format(step, loss, acc, iou, fwiou, speed, - calculate_eta(all_step - step, speed))) - timer.restart() - sys.stdout.flush() - except fluid.core.EOFException: - break - - if vis: - return - - category_iou, avg_iou = conf_mat.mean_iou() - category_acc, avg_acc = conf_mat.accuracy() - fwiou = conf_mat.frequency_weighted_iou() - print("[EVAL]#image={} acc={:.4f} IoU={:.4f} FWIoU={:.4f}".format( - num_images, avg_acc, avg_iou, fwiou)) - print("[EVAL]Category Acc:", category_acc) - print("[EVAL]Category IoU:", category_iou) - print("[EVAL]Kappa: {:.4f}".format(conf_mat.kappa())) - - return category_iou, avg_iou, category_acc, avg_acc - - -def main(): - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - evaluate(cfg, **args.__dict__) - - -if __name__ == '__main__': - main() diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/export_model.py b/legacy/contrib/NeurIPS_SN7/pdseg/export_model.py deleted file mode 100644 index 7b7306128f..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/export_model.py +++ /dev/null @@ -1,134 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import sys -import time -import pprint -import cv2 -import argparse -import numpy as np -import paddle.fluid as fluid - -from utils.config import cfg -from models.model_builder import build_model -from models.model_builder import ModelPhase - - -def parse_args(): - parser = argparse.ArgumentParser( - description='PaddleSeg Inference Model Exporter') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def export_inference_config(): - deploy_cfg = '''DEPLOY: - USE_GPU : 1 - USE_PR : 0 - MODEL_PATH : "%s" - MODEL_FILENAME : "%s" - PARAMS_FILENAME : "%s" - EVAL_CROP_SIZE : %s - MEAN : %s - STD : %s - IMAGE_TYPE : "%s" - NUM_CLASSES : %d - CHANNELS : %d - PRE_PROCESSOR : "SegPreProcessor" - PREDICTOR_MODE : "ANALYSIS" - BATCH_SIZE : 1 - ''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME, - cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.MEAN, cfg.STD, - cfg.DATASET.IMAGE_TYPE, cfg.DATASET.NUM_CLASSES, len(cfg.STD)) - if not os.path.exists(cfg.FREEZE.SAVE_DIR): - os.mkdir(cfg.FREEZE.SAVE_DIR) - yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml') - with open(yaml_path, "w") as fp: - fp.write(deploy_cfg) - return yaml_path - - -def export_inference_model(args): - """ - Export PaddlePaddle inference model for prediction depolyment and serving. - """ - print("Exporting inference model...") - startup_prog = fluid.Program() - infer_prog = fluid.Program() - image, logit_out = build_model( - infer_prog, startup_prog, phase=ModelPhase.PREDICT) - - # Use CPU for exporting inference model instead of GPU - place = fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - infer_prog = infer_prog.clone(for_test=True) - - if os.path.exists(cfg.TEST.TEST_MODEL): - print('load test model:', cfg.TEST.TEST_MODEL) - try: - fluid.load(infer_prog, os.path.join(cfg.TEST.TEST_MODEL, 'model'), - exe) - except: - fluid.io.load_params( - exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) - else: - print("TEST.TEST_MODEL diretory is empty!") - exit(-1) - - fluid.io.save_inference_model( - cfg.FREEZE.SAVE_DIR, - feeded_var_names=[image.name], - target_vars=[logit_out], - executor=exe, - main_program=infer_prog, - model_filename=cfg.FREEZE.MODEL_FILENAME, - params_filename=cfg.FREEZE.PARAMS_FILENAME) - print("Inference model exported!") - print("Exporting inference model config...") - deploy_cfg_path = export_inference_config() - print("Inference model saved : [%s]" % (deploy_cfg_path)) - - -def main(): - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - export_inference_model(args) - - -if __name__ == '__main__': - main() diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/loss.py b/legacy/contrib/NeurIPS_SN7/pdseg/loss.py deleted file mode 100644 index 06975f6d7a..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/loss.py +++ /dev/null @@ -1,172 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle.fluid as fluid -import numpy as np -import importlib -from utils.config import cfg - - -def softmax_with_loss(logit, - label, - ignore_mask=None, - num_classes=2, - weight=None): - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - label = fluid.layers.elementwise_min( - label, fluid.layers.assign(np.array([num_classes - 1], dtype=np.int32))) - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.reshape(logit, [-1, num_classes]) - label = fluid.layers.reshape(label, [-1, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.reshape(ignore_mask, [-1, 1]) - if weight is None: - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, - label, - ignore_index=cfg.DATASET.IGNORE_INDEX, - return_softmax=True) - else: - label = fluid.layers.squeeze(label, axes=[-1]) - label_one_hot = fluid.one_hot(input=label, depth=num_classes) - if isinstance(weight, list): - assert len( - weight - ) == num_classes, "weight length must equal num of classes" - weight = fluid.layers.assign(np.array([weight], dtype='float32')) - elif isinstance(weight, str): - assert weight.lower( - ) == 'dynamic', 'if weight is string, must be dynamic!' - tmp = [] - total_num = fluid.layers.cast( - fluid.layers.shape(label)[0], 'float32') - for i in range(num_classes): - cls_pixel_num = fluid.layers.reduce_sum(label_one_hot[:, i]) - ratio = total_num / (cls_pixel_num + 1) - tmp.append(ratio) - weight = fluid.layers.concat(tmp) - weight = weight / fluid.layers.reduce_sum(weight) * num_classes - elif isinstance(weight, fluid.layers.Variable): - pass - else: - raise ValueError( - 'Expect weight is a list, string or Variable, but receive {}'. - format(type(weight))) - weight = fluid.layers.reshape(weight, [1, num_classes]) - weighted_label_one_hot = fluid.layers.elementwise_mul( - label_one_hot, weight) - probs = fluid.layers.softmax(logit) - loss = fluid.layers.cross_entropy( - probs, - weighted_label_one_hot, - soft_label=True, - ignore_index=cfg.DATASET.IGNORE_INDEX) - weighted_label_one_hot.stop_gradient = True - - loss = loss * ignore_mask - avg_loss = fluid.layers.mean(loss) / fluid.layers.mean(ignore_mask) - - label.stop_gradient = True - ignore_mask.stop_gradient = True - return avg_loss - - -# to change, how to appicate ignore index and ignore mask -def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception( - "dice loss is only applicable to one channel classfication") - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.transpose(ignore_mask, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit = logit * ignore_mask - label = label * ignore_mask - reduce_dim = list(range(1, len(logit.shape))) - inse = fluid.layers.reduce_sum(logit * label, dim=reduce_dim) - dice_denominator = fluid.layers.reduce_sum( - logit, dim=reduce_dim) + fluid.layers.reduce_sum( - label, dim=reduce_dim) - dice_score = 1 - inse * 2 / (dice_denominator + epsilon) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return fluid.layers.reduce_mean(dice_score) - - -def bce_loss(logit, label, ignore_mask=None): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception("bce loss is only applicable to binary classfication") - label = fluid.layers.cast(label, 'float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logit, - label=label, - ignore_index=cfg.DATASET.IGNORE_INDEX, - normalize=True) # or False - loss = fluid.layers.reduce_sum(loss) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return loss - - -def multi_softmax_with_loss(logits, - label, - ignore_mask=None, - num_classes=2, - weight=None): - if isinstance(logits, tuple): - avg_loss = 0 - for i, logit in enumerate(logits): - if label.shape[2] != logit.shape[2] or label.shape[ - 3] != logit.shape[3]: - label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = softmax_with_loss(logit, label, logit_mask, num_classes) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = softmax_with_loss( - logits, label, ignore_mask, num_classes, weight=weight) - return avg_loss - - -def multi_dice_loss(logits, label, ignore_mask=None): - if isinstance(logits, tuple): - avg_loss = 0 - for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = dice_loss(logit, logit_label, logit_mask) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = dice_loss(logits, label, ignore_mask) - return avg_loss - - -def multi_bce_loss(logits, label, ignore_mask=None): - if isinstance(logits, tuple): - avg_loss = 0 - for i, logit in enumerate(logits): - logit_label = fluid.layers.resize_nearest(label, logit.shape[2:]) - logit_mask = (logit_label.astype('int32') != - cfg.DATASET.IGNORE_INDEX).astype('int32') - loss = bce_loss(logit, logit_label, logit_mask) - avg_loss += cfg.MODEL.MULTI_LOSS_WEIGHT[i] * loss - else: - avg_loss = bce_loss(logits, label, ignore_mask) - return avg_loss diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/lovasz_losses.py b/legacy/contrib/NeurIPS_SN7/pdseg/lovasz_losses.py deleted file mode 100644 index a0308d06b9..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/lovasz_losses.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Lovasz-Softmax and Jaccard hinge loss in PaddlePaddle""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -import numpy as np - - -def _cumsum(x): - y = np.array(x) - return np.cumsum(y, axis=0) - - -def create_tmp_var(name, dtype, shape): - return fluid.default_main_program().current_block().create_var( - name=name, dtype=dtype, shape=shape) - - -def lovasz_grad(gt_sorted): - """ - Computes gradient of the Lovasz extension w.r.t sorted errors - See Alg. 1 in paper - """ - gt_sorted = fluid.layers.squeeze(gt_sorted, axes=[1]) - gts = fluid.layers.reduce_sum(gt_sorted) - len_gt = fluid.layers.shape(gt_sorted) - - # Acceleration is achieved by reducing the number of calls to cumsum. - # This calculation method is equivalent to that of the original paper. - var_one = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32') - range_ = fluid.layers.range(1, len_gt + var_one, 1, 'int32') - tmp_var = create_tmp_var( - name='tmp_var', dtype=gt_sorted.dtype, shape=gt_sorted.shape) - cumsum_ = fluid.layers.py_func(func=_cumsum, x=gt_sorted, out=tmp_var) - intersection = gts - cumsum_ - union = intersection + range_ - - jaccard = 1.0 - intersection / union - jaccard0 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[1]) - jaccard1 = fluid.layers.slice(jaccard, axes=[0], starts=[1], ends=[len_gt]) - jaccard2 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[-1]) - jaccard = fluid.layers.concat([jaccard0, jaccard1 - jaccard2], axis=0) - jaccard = fluid.layers.unsqueeze(jaccard, axes=[1]) - return jaccard - - -def lovasz_hinge(logits, labels, ignore=None): - """ - Binary Lovasz hinge loss - logits: [N, C, H, W] Tensor, logits at each pixel (between -\infty and +\infty) - labels: [N, 1, H, W] Tensor, binary ground truth masks (0 or 1) - ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0 - """ - loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) - return loss - - -def lovasz_hinge_flat(logits, labels): - """ - Binary Lovasz hinge loss - logits: [P] Tensor, logits at each prediction (between -\infty and +\infty) - labels: [P] Tensor, binary ground truth labels (0 or 1) - """ - shape = fluid.layers.shape(logits) - y = fluid.layers.zeros_like(shape[0]) - - out_var = fluid.layers.create_tensor("float32") - with fluid.layers.control_flow.Switch() as switch: - with switch.case(fluid.layers.equal(shape[0], y)): - loss = fluid.layers.reduce_sum(logits) * 0. - fluid.layers.assign(input=loss, output=out_var) - with switch.case(fluid.layers.greater_than(shape[0], y)): - labelsf = fluid.layers.cast(labels, logits.dtype) - signs = labelsf * 2 - 1. - signs.stop_gradient = True - errors = 1.0 - fluid.layers.elementwise_mul(logits, signs) - errors_sorted, perm = fluid.layers.argsort( - errors, axis=0, descending=True) - errors_sorted.stop_gradient = False - gt_sorted = fluid.layers.gather(labelsf, perm) - - grad = lovasz_grad(gt_sorted) - grad.stop_gradient = True - loss = fluid.layers.reduce_sum( - fluid.layers.relu(errors_sorted) * grad) - fluid.layers.assign(input=loss, output=out_var) - return out_var - - -def flatten_binary_scores(scores, labels, ignore=None): - """ - Flattens predictions in the batch (binary case) - Remove labels according to 'ignore' - """ - scores = fluid.layers.reshape(scores, [-1, 1]) - labels = fluid.layers.reshape(labels, [-1, 1]) - labels.stop_gradient = True - if ignore is None: - return scores, labels - ignore = fluid.layers.cast(ignore, 'int32') - ignore_mask = fluid.layers.reshape(ignore, (-1, 1)) - indexs = fluid.layers.where(ignore_mask == 1) - indexs.stop_gradient = True - vscores = fluid.layers.gather(scores, indexs[:, 0]) - vlabels = fluid.layers.gather(labels, indexs[:, 0]) - return vscores, vlabels - - -def lovasz_softmax(probas, labels, classes='present', ignore=None): - """ - Multi-class Lovasz-Softmax loss - probas: [N, C, H, W] Tensor, class probabilities at each prediction (between 0 and 1). - labels: [N, 1, H, W] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0 - """ - vprobas, vlabels = flatten_probas(probas, labels, ignore) - loss = lovasz_softmax_flat(vprobas, vlabels, classes=classes) - return loss - - -def lovasz_softmax_flat(probas, labels, classes='present'): - """ - Multi-class Lovasz-Softmax loss - probas: [P, C] Tensor, class probabilities at each prediction (between 0 and 1) - labels: [P] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - """ - C = probas.shape[1] - losses = [] - present = [] - classes_to_sum = list(range(C)) if classes in ['all', 'present' - ] else classes - for c in classes_to_sum: - fg = fluid.layers.cast(labels == c, probas.dtype) - fg.stop_gradient = True - if classes == 'present': - present.append( - fluid.layers.cast(fluid.layers.reduce_sum(fg) > 0, "int64")) - if C == 1: - if len(classes_to_sum) > 1: - raise ValueError('Sigmoid output possible only with 1 class') - class_pred = probas[:, 0] - else: - class_pred = probas[:, c] - errors = fluid.layers.abs(fg - class_pred) - errors_sorted, perm = fluid.layers.argsort( - errors, axis=0, descending=True) - errors_sorted.stop_gradient = False - - fg_sorted = fluid.layers.gather(fg, perm) - fg_sorted.stop_gradient = True - - grad = lovasz_grad(fg_sorted) - grad.stop_gradient = True - loss = fluid.layers.reduce_sum(errors_sorted * grad) - - losses.append(loss) - - if len(classes_to_sum) == 1: - return losses[0] - - losses_tensor = fluid.layers.stack(losses) - if classes == 'present': - present_tensor = fluid.layers.stack(present) - index = fluid.layers.where(present_tensor == 1) - index.stop_gradient = True - losses_tensor = fluid.layers.gather(losses_tensor, index[:, 0]) - loss = fluid.layers.mean(losses_tensor) - return loss - - -def flatten_probas(probas, labels, ignore=None): - """ - Flattens predictions in the batch - """ - if len(probas.shape) == 3: - probas = fluid.layers.unsqueeze(probas, axis=[1]) - C = probas.shape[1] - probas = fluid.layers.transpose(probas, [0, 2, 3, 1]) - probas = fluid.layers.reshape(probas, [-1, C]) - labels = fluid.layers.reshape(labels, [-1, 1]) - if ignore is None: - return probas, labels - ignore = fluid.layers.cast(ignore, 'int32') - ignore_mask = fluid.layers.reshape(ignore, [-1, 1]) - indexs = fluid.layers.where(ignore_mask == 1) - indexs.stop_gradient = True - vprobas = fluid.layers.gather(probas, indexs[:, 0]) - vlabels = fluid.layers.gather(labels, indexs[:, 0]) - return vprobas, vlabels diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/metrics.py b/legacy/contrib/NeurIPS_SN7/pdseg/metrics.py deleted file mode 100644 index 34695ae740..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/metrics.py +++ /dev/null @@ -1,102 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - - -class ConfusionMatrix(object): - """ - Confusion Matrix for segmentation evaluation - """ - - def __init__(self, num_classes=2, streaming=False): - self.confusion_matrix = np.zeros([num_classes, num_classes], - dtype=np.float) - self.num_classes = num_classes - self.streaming = streaming - - def calculate(self, pred, label, ignore=None): - # If not in streaming mode, clear matrix everytime when call `calculate` - if not self.streaming: - self.zero_matrix() - - label = np.transpose(label, (0, 2, 3, 1)) - ignore = np.transpose(ignore, (0, 2, 3, 1)) - mask = np.array(ignore) == 1 - - label = np.asarray(label)[mask] - pred = np.asarray(pred)[mask] - one = np.ones_like(pred) - # Accumuate ([row=label, col=pred], 1) into sparse matrix - spm = csr_matrix((one, (label, pred)), - shape=(self.num_classes, self.num_classes)) - spm = spm.todense() - self.confusion_matrix += spm - - def zero_matrix(self): - """ Clear confusion matrix """ - self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], - dtype=np.float) - - def _iou(self): - """ - Intersection over Union (IoU) - """ - cm_diag = np.diag(self.confusion_matrix) - iou = cm_diag / (np.sum(self.confusion_matrix, axis=1) + np.sum( - self.confusion_matrix, axis=0) - cm_diag) - return iou - - def mean_iou(self): - """ - Mean Intersection over Union (MIoU) - """ - iou = self._iou() - m_iou = np.mean(iou) - return iou, m_iou - - def frequency_weighted_iou(self): - """ - Frequency Weighted Intersection over Union (FWIoU) - """ - frequency = np.sum( - self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix) - iou = self._iou() - fw_iou = np.sum(frequency * iou) - return fw_iou - - def accuracy(self): - """ - Mean Pixel Accuracy (MPA) - """ - pa = np.diag(self.confusion_matrix) / np.sum( - self.confusion_matrix, axis=1) - mpa = np.mean(pa) - return pa, mpa - - def kappa(self): - """ - Kappa coefficient - """ - cm_sum = np.sum(self.confusion_matrix) - po = np.sum(np.diag(self.confusion_matrix)) / cm_sum - pe = np.dot( - np.sum(self.confusion_matrix, axis=0), - np.sum(self.confusion_matrix, axis=1)) / (cm_sum**2) - kappa = (po - pe) / (1 - pe) - return kappa diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/__init__.py deleted file mode 100644 index b9c755762d..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import models.modeling diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/__init__.py deleted file mode 100644 index ba1baeb1ac..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/mobilenet_v2.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/mobilenet_v2.py deleted file mode 100644 index 02c8df94c0..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/mobilenet_v2.py +++ /dev/null @@ -1,315 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from utils.config import cfg - -__all__ = [ - 'MobileNetV2', 'MobileNetV2_x0_25', 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', - 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', 'MobileNetV2_scale' -] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class MobileNetV2(): - def __init__(self, scale=1.0, change_depth=False, output_stride=None): - self.params = train_parameters - self.scale = scale - self.change_depth = change_depth - self.bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] if change_depth == False else [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 5, 2), - (6, 64, 7, 2), - (6, 96, 5, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), - ] - self.modify_bottle_params(output_stride) - - def modify_bottle_params(self, output_stride=None): - if output_stride is not None and output_stride % 2 != 0: - raise Exception("output stride must to be even number") - if output_stride is None: - return - else: - stride = 2 - for i, layer_setting in enumerate(self.bottleneck_params_list): - t, c, n, s = layer_setting - stride = stride * s - if stride > output_stride: - s = 1 - self.bottleneck_params_list[i] = (t, c, n, s) - - def net(self, input, class_dim=1000, end_points=None, decode_points=None): - scale = self.scale - change_depth = self.change_depth - #if change_depth is True, the new depth is 1.4 times as deep as before. - bottleneck_params_list = self.bottleneck_params_list - decode_ends = dict() - - def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - #conv1 - input = self.conv_bn_layer( - input, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1, - if_act=True, - name='conv1_1') - layer_count = 1 - - #print("node test:", layer_count, input.shape) - - if check_points(layer_count, decode_points): - decode_ends[layer_count] = input - - if check_points(layer_count, end_points): - return input, decode_ends - - # bottleneck sequences - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - input, depthwise_output = self.invresi_blocks( - input=input, - in_c=in_c, - t=t, - c=int(c * scale), - n=n, - s=s, - name='conv' + str(i)) - in_c = int(c * scale) - layer_count += n - - #print("node test:", layer_count, input.shape) - if check_points(layer_count, decode_points): - decode_ends[layer_count] = depthwise_output - - if check_points(layer_count, end_points): - return input, decode_ends - - #last_conv - input = self.conv_bn_layer( - input=input, - num_filters=int(1280 * scale) if scale > 1.0 else 1280, - filter_size=1, - stride=1, - padding=0, - if_act=True, - name='conv9') - - input = fluid.layers.pool2d( - input=input, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True) - - output = fluid.layers.fc( - input=input, - size=class_dim, - param_attr=ParamAttr(name='fc10_weights'), - bias_attr=ParamAttr(name='fc10_offset')) - return output - - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - def shortcut(self, input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - def inverted_residual_unit(self, - input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = self.conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = self.conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - depthwise_output = bottleneck_conv - - linear_out = self.conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - - if ifshortcut: - out = self.shortcut(input=input, data_residual=linear_out) - return out, depthwise_output - else: - return linear_out, depthwise_output - - def invresi_blocks(self, input, in_c, t, c, n, s, name=None): - first_block, depthwise_output = self.inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block, depthwise_output = self.inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block, depthwise_output - - -def MobileNetV2_x0_25(): - model = MobileNetV2(scale=0.25) - return model - - -def MobileNetV2_x0_5(): - model = MobileNetV2(scale=0.5) - return model - - -def MobileNetV2_x1_0(): - model = MobileNetV2(scale=1.0) - return model - - -def MobileNetV2_x1_5(): - model = MobileNetV2(scale=1.5) - return model - - -def MobileNetV2_x2_0(): - model = MobileNetV2(scale=2.0) - return model - - -def MobileNetV2_scale(): - model = MobileNetV2(scale=1.2, change_depth=True) - return model - - -if __name__ == '__main__': - image_shape = [-1, 3, 224, 224] - image = fluid.data(name='image', shape=image_shape, dtype='float32') - model = MobileNetV2_x1_0() - logit, decode_ends = model.net(image) - #print("logit:", logit.shape) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/resnet.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/resnet.py deleted file mode 100644 index e4d09ed90e..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/resnet.py +++ /dev/null @@ -1,341 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr - -__all__ = [ - "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" -] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ResNet(): - def __init__(self, layers=50, scale=1.0, stem=None): - self.params = train_parameters - self.layers = layers - self.scale = scale - self.stem = stem - - def net(self, - input, - class_dim=1000, - end_points=None, - decode_points=None, - resize_points=None, - dilation_dict=None): - layers = self.layers - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format(supported_layers, layers) - - decode_ends = dict() - - def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - def get_dilated_rate(dilation_dict, idx): - if dilation_dict is None or idx not in dilation_dict: - return 1 - else: - return dilation_dict[idx] - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - - if self.stem == 'icnet' or self.stem == 'pspnet': - conv = self.conv_bn_layer( - input=input, - num_filters=int(64 * self.scale), - filter_size=3, - stride=2, - act='relu', - name="conv1_1") - conv = self.conv_bn_layer( - input=conv, - num_filters=int(64 * self.scale), - filter_size=3, - stride=1, - act='relu', - name="conv1_2") - conv = self.conv_bn_layer( - input=conv, - num_filters=int(128 * self.scale), - filter_size=3, - stride=1, - act='relu', - name="conv1_3") - else: - conv = self.conv_bn_layer( - input=input, - num_filters=int(64 * self.scale), - filter_size=7, - stride=2, - act='relu', - name="conv1") - - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - layer_count = 1 - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - if layers >= 50: - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - dilation_rate = get_dilated_rate(dilation_dict, block) - - conv = self.bottleneck_block( - input=conv, - num_filters=int(num_filters[block] * self.scale), - stride=2 - if i == 0 and block != 0 and dilation_rate == 1 else 1, - name=conv_name, - dilation=dilation_rate) - layer_count += 3 - - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - if check_points(layer_count, resize_points): - conv = self.interp( - conv, - np.ceil( - np.array(conv.shape[2:]).astype('int32') / 2)) - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block == i == 0, - name=conv_name) - layer_count += 2 - if check_points(layer_count, decode_points): - decode_ends[layer_count] = conv - - if check_points(layer_count, end_points): - return conv, decode_ends - - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return out - - def zero_padding(self, input, padding): - return fluid.layers.pad( - input, [0, 0, 0, 0, padding, padding, padding, padding]) - - def interp(self, input, out_shape): - out_shape = list(out_shape.astype("int32")) - return fluid.layers.resize_bilinear(input, out_shape=out_shape) - - def conv_bn_layer(self, - input, - num_filters, - filter_size, - stride=1, - dilation=1, - groups=1, - act=None, - name=None): - - if self.stem == 'pspnet': - bias_attr = ParamAttr(name=name + "_biases") - else: - bias_attr = False - - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2 if dilation == 1 else 0, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), - bias_attr=bias_attr, - name=name + '.conv2d.output.1') - - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - ) - - def shortcut(self, input, ch_out, stride, is_first, name): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return self.conv_bn_layer(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck_block(self, input, num_filters, stride, name, dilation=1): - if self.stem == 'pspnet' and self.layers == 101: - strides = [1, stride] - else: - strides = [stride, 1] - - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=1, - dilation=1, - stride=strides[0], - act='relu', - name=name + "_branch2a") - if dilation > 1: - conv0 = self.zero_padding(conv0, dilation) - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - dilation=dilation, - stride=strides[1], - act='relu', - name=name + "_branch2b") - conv2 = self.conv_bn_layer( - input=conv1, - num_filters=num_filters * 4, - dilation=1, - filter_size=1, - act=None, - name=name + "_branch2c") - - short = self.shortcut( - input, - num_filters * 4, - stride, - is_first=False, - name=name + "_branch1") - - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu', name=name + ".add.output.5") - - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride, - name=name + "_branch2a") - conv1 = self.conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - act=None, - name=name + "_branch2b") - short = self.shortcut( - input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def ResNet18(): - model = ResNet(layers=18) - return model - - -def ResNet34(): - model = ResNet(layers=34) - return model - - -def ResNet50(): - model = ResNet(layers=50) - return model - - -def ResNet101(): - model = ResNet(layers=101) - return model - - -def ResNet152(): - model = ResNet(layers=152) - return model diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/vgg.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/vgg.py deleted file mode 100644 index 98b688e34d..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/vgg.py +++ /dev/null @@ -1,82 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -__all__ = ["VGGNet"] - - -def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - -class VGGNet(): - def __init__(self, layers=16): - self.layers = layers - - def net(self, input, class_dim=1000, end_points=None, decode_points=None): - short_cuts = dict() - layers_count = 0 - layers = self.layers - vgg_spec = { - 11: ([1, 1, 2, 2, 2]), - 13: ([2, 2, 2, 2, 2]), - 16: ([2, 2, 3, 3, 3]), - 19: ([2, 2, 4, 4, 4]) - } - assert layers in vgg_spec.keys(), \ - "supported layers are {} but input layer is {}".format(vgg_spec.keys(), layers) - - nums = vgg_spec[layers] - channels = [64, 128, 256, 512, 512] - conv = input - for i in range(len(nums)): - conv = self.conv_block( - conv, channels[i], nums[i], name="conv" + str(i + 1) + "_") - layers_count += nums[i] - if check_points(layers_count, decode_points): - short_cuts[layers_count] = conv - if check_points(layers_count, end_points): - return conv, short_cuts - - return conv - - def conv_block(self, input, num_filter, groups, name=None): - conv = input - for i in range(groups): - conv = fluid.layers.conv2d( - input=conv, - num_filters=num_filter, - filter_size=3, - stride=1, - padding=1, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - name=name + str(i + 1) + "_weights"), - bias_attr=False) - return fluid.layers.pool2d( - input=conv, pool_size=2, pool_type='max', pool_stride=2) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/xception.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/xception.py deleted file mode 100644 index ba9ce972eb..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/backbone/xception.py +++ /dev/null @@ -1,317 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import paddle -import math -import paddle.fluid as fluid -from models.libs.model_libs import scope, name_scope -from models.libs.model_libs import bn, bn_relu, relu -from models.libs.model_libs import conv -from models.libs.model_libs import separate_conv - -__all__ = ['xception_65', 'xception_41', 'xception_71'] - - -def check_data(data, number): - if type(data) == int: - return [data] * number - assert len(data) == number - return data - - -def check_stride(s, os): - if s <= os: - return True - else: - return False - - -def check_points(count, points): - if points is None: - return False - else: - if isinstance(points, list): - return (True if count in points else False) - else: - return (True if count == points else False) - - -class Xception(): - def __init__(self, backbone="xception_65"): - self.bottleneck_params = self.gen_bottleneck_params(backbone) - self.backbone = backbone - - def gen_bottleneck_params(self, backbone='xception_65'): - if backbone == 'xception_65': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_41': - bottleneck_params = { - "entry_flow": (3, [2, 2, 2], [128, 256, 728]), - "middle_flow": (8, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - elif backbone == 'xception_71': - bottleneck_params = { - "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), - "middle_flow": (16, 1, 728), - "exit_flow": (2, [2, 1], [[728, 1024, 1024], [1536, 1536, - 2048]]) - } - else: - raise Exception( - "xception backbont only support xception_41/xception_65/xception_71" - ) - return bottleneck_params - - def net(self, - input, - output_stride=32, - num_classes=1000, - end_points=None, - decode_points=None): - self.stride = 2 - self.block_point = 0 - self.output_stride = output_stride - self.decode_points = decode_points - self.short_cuts = dict() - with scope(self.backbone): - # Entry flow - data = self.entry_flow(input) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - # Middle flow - data = self.middle_flow(data) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - # Exit flow - data = self.exit_flow(data) - if check_points(self.block_point, end_points): - return data, self.short_cuts - - data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) - data = fluid.layers.dropout(data, 0.5) - stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) - with scope("logit"): - out = fluid.layers.fc( - input=data, - size=num_classes, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - name='weights', - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=fluid.param_attr.ParamAttr(name='bias')) - - return out - - def entry_flow(self, data): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) - with scope("entry_flow"): - with scope("conv1"): - data = bn_relu( - conv( - data, 32, 3, stride=2, padding=1, - param_attr=param_attr)) - with scope("conv2"): - data = bn_relu( - conv( - data, 64, 3, stride=1, padding=1, - param_attr=param_attr)) - - # get entry flow params - block_num = self.bottleneck_params["entry_flow"][0] - strides = self.bottleneck_params["entry_flow"][1] - chns = self.bottleneck_params["entry_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("entry_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def middle_flow(self, data): - block_num = self.bottleneck_params["middle_flow"][0] - strides = self.bottleneck_params["middle_flow"][1] - chns = self.bottleneck_params["middle_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("middle_flow"): - for i in range(block_num): - block_point = block_point + 1 - with scope("block" + str(i + 1)): - stride = strides[i] if check_stride(s * strides[i], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, chns[i], [1, 1, strides[i]], skip_conv=False) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def exit_flow(self, data): - block_num = self.bottleneck_params["exit_flow"][0] - strides = self.bottleneck_params["exit_flow"][1] - chns = self.bottleneck_params["exit_flow"][2] - strides = check_data(strides, block_num) - chns = check_data(chns, block_num) - - assert (block_num == 2) - # params to control your flow - s = self.stride - block_point = self.block_point - output_stride = self.output_stride - with scope("exit_flow"): - with scope('block1'): - block_point += 1 - stride = strides[0] if check_stride(s * strides[0], - output_stride) else 1 - data, short_cuts = self.xception_block(data, chns[0], - [1, 1, stride]) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - with scope('block2'): - block_point += 1 - stride = strides[1] if check_stride(s * strides[1], - output_stride) else 1 - data, short_cuts = self.xception_block( - data, - chns[1], [1, 1, stride], - dilation=2, - has_skip=False, - activation_fn_in_separable_conv=True) - s = s * stride - if check_points(block_point, self.decode_points): - self.short_cuts[block_point] = short_cuts[1] - - self.stride = s - self.block_point = block_point - return data - - def xception_block(self, - input, - channels, - strides=1, - filters=3, - dilation=1, - skip_conv=True, - has_skip=True, - activation_fn_in_separable_conv=False): - repeat_number = 3 - channels = check_data(channels, repeat_number) - filters = check_data(filters, repeat_number) - strides = check_data(strides, repeat_number) - data = input - results = [] - for i in range(repeat_number): - with scope('separable_conv' + str(i + 1)): - if not activation_fn_in_separable_conv: - data = relu(data) - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation) - else: - data = separate_conv( - data, - channels[i], - strides[i], - filters[i], - dilation=dilation, - act=relu) - results.append(data) - if not has_skip: - return data, results - if skip_conv: - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal( - loc=0.0, scale=0.09)) - with scope('shortcut'): - skip = bn( - conv( - input, - channels[-1], - 1, - strides[-1], - groups=1, - padding=0, - param_attr=param_attr)) - else: - skip = input - return data + skip, results - - -def xception_65(): - model = Xception("xception_65") - return model - - -def xception_41(): - model = Xception("xception_41") - return model - - -def xception_71(): - model = Xception("xception_71") - return model - - -if __name__ == '__main__': - image_shape = [-1, 3, 224, 224] - image = fluid.data(name='image', shape=image_shape, dtype='float32') - model = xception_65() - logit = model.net(image) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/__init__.py deleted file mode 100644 index ba1baeb1ac..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/model_libs.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/model_libs.py deleted file mode 100644 index 3bee471ca4..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/libs/model_libs.py +++ /dev/null @@ -1,244 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -from utils.config import cfg -import contextlib - -bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) -name_scope = "" - - -@contextlib.contextmanager -def scope(name): - global name_scope - bk = name_scope - name_scope = name_scope + name + '/' - yield - name_scope = bk - - -def max_pool(input, kernel, stride, padding): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='max', - pool_stride=stride, - pool_padding=padding) - return data - - -def avg_pool(input, kernel, stride, padding=0): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='avg', - pool_stride=stride, - pool_padding=padding) - return data - - -def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - N, C, H, W = input.shape - if C % G != 0: - # print "group can not divide channle:", C, G - for d in range(10): - for t in [d, -d]: - if G + t <= 0: continue - if C % (G + t) == 0: - G = G + t - break - if C % G == 0: - # print "use group size:", G - break - assert C % G == 0 - x = fluid.layers.group_norm( - input, - groups=G, - param_attr=param_attr, - bias_attr=bias_attr, - name=name_scope + 'group_norm') - return x - - -def bn(*args, **kargs): - if cfg.MODEL.DEFAULT_NORM_TYPE == 'bn': - with scope('BatchNorm'): - return fluid.layers.batch_norm( - *args, - epsilon=cfg.MODEL.DEFAULT_EPSILON, - momentum=cfg.MODEL.BN_MOMENTUM, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs) - elif cfg.MODEL.DEFAULT_NORM_TYPE == 'gn': - with scope('GroupNorm'): - return group_norm( - args[0], - cfg.MODEL.DEFAULT_GROUP_NUMBER, - eps=cfg.MODEL.DEFAULT_EPSILON, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer)) - else: - raise Exception("Unsupport norm type:" + cfg.MODEL.DEFAULT_NORM_TYPE) - - -def bn_relu(data): - return fluid.layers.relu(bn(data)) - - -def relu(data): - return fluid.layers.relu(data) - - -def conv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = fluid.ParamAttr( - name=name_scope + 'biases', - regularizer=None, - initializer=fluid.initializer.ConstantInitializer(value=0.0)) - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d(*args, **kargs) - - -def deconv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = name_scope + 'biases' - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d_transpose(*args, **kargs) - - -def separate_conv(input, channel, stride, filter, dilation=1, act=None): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope('depthwise'): - input = conv( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - param_attr=param_attr) - input = bn(input) - if act: input = act(input) - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('pointwise'): - input = conv( - input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr) - input = bn(input) - if act: input = act(input) - return input - - -def conv_bn_layer(input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=fluid.ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=fluid.ParamAttr(name=bn_name + "_scale"), - bias_attr=fluid.ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - -def fuse(input, - num_classes, - filter_size1=3, - filter_size2=3, - num_filters1=512, - num_filters2=256): - param_attr = fluid.ParamAttr( - name='logit_weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - - fuse1 = conv_bn_layer( - input, - filter_size1, - num_filters1, - stride=1, - padding=1, - name="fuse1", - if_act=False) - fuse1 = fluid.layers.relu(fuse1) - - fuse2 = conv_bn_layer( - fuse1, - filter_size2, - num_filters2, - stride=1, - padding=1, - name="fuse2", - if_act=False) - fuse2 = fluid.layers.relu(fuse2) - - logit = conv( - fuse2, - num_classes, - 3, - stride=1, - padding=1, - bias_attr=True, - param_attr=param_attr) - - return logit diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/model_builder.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/model_builder.py deleted file mode 100644 index a5e350f051..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/model_builder.py +++ /dev/null @@ -1,383 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import struct - -import paddle.fluid as fluid -import numpy as np -from paddle.fluid.proto.framework_pb2 import VarType - -import solver -from utils.config import cfg -from loss import multi_softmax_with_loss -from loss import multi_dice_loss -from loss import multi_bce_loss -from lovasz_losses import lovasz_hinge -from lovasz_losses import lovasz_softmax -from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn -from models.libs.model_libs import fuse - - -class ModelPhase(object): - """ - Standard name for model phase in PaddleSeg - - The following standard keys are defined: - * `TRAIN`: training mode. - * `EVAL`: testing/evaluation mode. - * `PREDICT`: prediction/inference mode. - * `VISUAL` : visualization mode - """ - - TRAIN = 'train' - EVAL = 'eval' - PREDICT = 'predict' - VISUAL = 'visual' - - @staticmethod - def is_train(phase): - return phase == ModelPhase.TRAIN - - @staticmethod - def is_predict(phase): - return phase == ModelPhase.PREDICT - - @staticmethod - def is_eval(phase): - return phase == ModelPhase.EVAL - - @staticmethod - def is_visual(phase): - return phase == ModelPhase.VISUAL - - @staticmethod - def is_valid_phase(phase): - """ Check valid phase """ - if ModelPhase.is_train(phase) or ModelPhase.is_predict(phase) \ - or ModelPhase.is_eval(phase) or ModelPhase.is_visual(phase): - return True - - return False - - -def seg_model(image, class_num): - model_name = cfg.MODEL.MODEL_NAME - if model_name == 'unet': - logits = unet.unet(image, class_num) - elif model_name == 'deeplabv3p': - logits = deeplab.deeplabv3p(image, class_num) - elif model_name == 'icnet': - logits = icnet.icnet(image, class_num) - elif model_name == 'pspnet': - logits = pspnet.pspnet(image, class_num) - elif model_name == 'hrnet': - logits = hrnet.hrnet(image, class_num) - elif model_name == 'fast_scnn': - logits = fast_scnn.fast_scnn(image, class_num) - else: - raise Exception( - "unknow model name, only support unet, deeplabv3p, icnet, pspnet, hrnet, fast_scnn" - ) - return logits - - -def softmax(logit): - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.softmax(logit) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - - -def sigmoid_to_softmax(logit): - """ - one channel to two channel - """ - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit_back = 1 - logit - logit = fluid.layers.concat([logit_back, logit], axis=-1) - logit = fluid.layers.transpose(logit, [0, 3, 1, 2]) - return logit - - -def test_aug_inv(logits_split): - ops = cfg.TEST.TEST_AUG_FLIP_OPS + cfg.TEST.TEST_AUG_ROTATE_OPS - for i, logits in enumerate(logits_split[1:]): - if type(ops[i]) is str: - if ops[i][0] == 'h': - logits_split[i + 1] = fluid.layers.flip(logits, [2]) - elif ops[i][0] == 'v': - logits_split[i + 1] = fluid.layers.flip(logits, [3]) - elif ops[i][0] == 'm': - logits_split[i + 1] = fluid.layers.transpose( - logits, [0, 1, 3, 2]) - else: - rot90 = fluid.layers.flip( - fluid.layers.transpose(logits, [0, 1, 3, 2]), [2]) - rot180 = fluid.layers.flip( - fluid.layers.transpose(rot90, [0, 1, 3, 2]), [2]) - logits_split[i + 1] = fluid.layers.transpose( - rot180, [0, 1, 3, 2]) - else: - times = (360 - ops[i]) // 90 - for _ in range(times): - logits = fluid.layers.flip( - fluid.layers.transpose(logits, [0, 1, 3, 2]), [2]) - logits_split[i + 1] = fluid.layers.transpose(logits, [0, 1, 3, 2]) - - logits = fluid.layers.stack( - logits_split, axis=0) # channel_mul, batch_size, 1, h, w - logits = fluid.layers.reduce_mean(logits, dim=0) # batch_size, 1, h, w - return logits - - -def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): - if not ModelPhase.is_valid_phase(phase): - raise ValueError("ModelPhase {} is not valid!".format(phase)) - if ModelPhase.is_train(phase): - width = cfg.TRAIN_CROP_SIZE[0] - height = cfg.TRAIN_CROP_SIZE[1] - else: - width = cfg.EVAL_CROP_SIZE[0] - height = cfg.EVAL_CROP_SIZE[1] - - channel_mul = 1 - if ModelPhase.is_eval(phase) and cfg.TEST.TEST_AUG: - channel_mul = len(cfg.TEST.TEST_AUG_FLIP_OPS) + len( - cfg.TEST.TEST_AUG_ROTATE_OPS) + 1 - image1_shape = [-1, cfg.DATASET.DATA_DIM * channel_mul, height, width] - - grt_shape = [-1, 1, height, width] - class_num = cfg.DATASET.NUM_CLASSES - - with fluid.program_guard(main_prog, start_prog): - with fluid.unique_name.guard(): - # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 - # 预测部署时只须对输入图像增加batch_size维度即可 - image1 = fluid.data( - name='image1', shape=image1_shape, dtype='float32') - image2 = None - if cfg.DATASET.INPUT_IMAGE_NUM == 2: - image2_shape = [ - -1, cfg.DATASET.DATA_DIM * channel_mul, height, width - ] - image2 = fluid.data( - name='image2', shape=image2_shape, dtype='float32') - label = fluid.data(name='label', shape=grt_shape, dtype='int32') - mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - - # use DataLoader when doing traning and evaluation - if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[image1, label, mask], - capacity=cfg.DATALOADER.BUF_SIZE, - iterable=False, - use_double_buffer=True) - else: - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[image1, image2, label, mask], - capacity=cfg.DATALOADER.BUF_SIZE, - iterable=False, - use_double_buffer=True) - - loss_type = cfg.SOLVER.LOSS - if not isinstance(loss_type, list): - loss_type = list(loss_type) - - # lovasz_hinge_loss或dice_loss或bce_loss只适用两类分割中 - if class_num > 2 and (("lovasz_hinge_loss" in loss_type) or - ("dice_loss" in loss_type) or - ("bce_loss" in loss_type)): - raise Exception( - "lovasz hinge loss, dice loss and bce loss are only applicable to binary classfication." - ) - - # 在两类分割情况下,当loss函数选择lovasz_hinge_loss或dice_loss或bce_loss的时候,最后logit输出通道数设置为1 - if ("dice_loss" in loss_type) or ("bce_loss" in loss_type) or ( - "lovasz_hinge_loss" in loss_type): - class_num = 1 - if ("softmax_loss" in loss_type) or ( - "lovasz_softmax_loss" in loss_type): - raise Exception( - "softmax loss or lovasz softmax loss can not combine with bce loss or dice loss or lovasz hinge loss." - ) - cfg.PHASE = phase - - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - if cfg.TEST.TEST_AUG: - image_split = fluid.layers.split( - image1, channel_mul, - dim=1) # batch_size, 3 x channel_mul, h, w - image1 = fluid.layers.concat( - image_split, - axis=0) # channel_mul * batch_size, 3, h, w - logits = seg_model(image1, class_num) - logits_split = fluid.layers.split( - logits, channel_mul, dim=0) - logits = test_aug_inv(logits_split) - else: - logits = seg_model(image1, class_num) - - else: - if cfg.TEST.TEST_AUG: - image_split = fluid.layers.split( - image1, channel_mul, - dim=1) # batch_size, 3 x channel_mul, h, w - image1 = fluid.layers.concat( - image_split, - axis=0) # channel_mul * batch_size, 3, h, w - logits1 = seg_model(image1, class_num) - logits1_split = fluid.layers.split( - logits1, channel_mul, dim=0) - logits1 = test_aug_inv(logits1_split) - - image_split = fluid.layers.split( - image2, channel_mul, - dim=1) # batch_size, 3 x channel_mul, h, w - image2 = fluid.layers.concat( - image_split, - axis=0) # channel_mul * batch_size, 3, h, w - logits2 = seg_model(image2, class_num) - logits2_split = fluid.layers.split( - logits2, channel_mul, dim=0) - logits2 = test_aug_inv(logits2_split) - else: - logits1 = seg_model(image1, class_num) - logits2 = seg_model(image2, class_num) - - if ModelPhase.is_visual(phase) and cfg.VIS.SEG_FOR_CD: - logits = fluid.layers.concat([logits1, logits2], axis=0) - else: - logits = fluid.layers.concat([logits1, logits2], axis=1) - logits = fuse(logits, class_num) - - # 根据选择的loss函数计算相应的损失函数 - if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - loss_valid = False - avg_loss_list = [] - valid_loss = [] - if "softmax_loss" in loss_type: - weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT - avg_loss_list.append( - multi_softmax_with_loss(logits, label, mask, class_num, - weight)) - loss_valid = True - valid_loss.append("softmax_loss") - if "dice_loss" in loss_type: - avg_loss_list.append(multi_dice_loss(logits, label, mask)) - loss_valid = True - valid_loss.append("dice_loss") - if "bce_loss" in loss_type: - avg_loss_list.append(multi_bce_loss(logits, label, mask)) - loss_valid = True - valid_loss.append("bce_loss") - if "lovasz_hinge_loss" in loss_type: - avg_loss_list.append( - lovasz_hinge(logits, label, ignore=mask)) - loss_valid = True - valid_loss.append("lovasz_hinge_loss") - if "lovasz_softmax_loss" in loss_type: - probas = fluid.layers.softmax(logits, axis=1) - avg_loss_list.append( - lovasz_softmax(probas, label, ignore=mask)) - loss_valid = True - valid_loss.append("lovasz_softmax_loss") - if not loss_valid: - raise Exception( - "SOLVER.LOSS: {} is set wrong. it should " - "include one of (softmax_loss, bce_loss, dice_loss, lovasz_hinge_loss, lovasz_softmax_loss) at least" - " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss'], ['lovasz_hinge_loss','bce_loss'], ['lovasz_softmax_loss','softmax_loss']" - .format(cfg.SOLVER.LOSS)) - - invalid_loss = [x for x in loss_type if x not in valid_loss] - if len(invalid_loss) > 0: - print( - "Warning: the loss {} you set is invalid. it will not be included in loss computed." - .format(invalid_loss)) - - avg_loss = 0 - for i in range(0, len(avg_loss_list)): - loss_name = valid_loss[i].upper() - loss_weight = eval('cfg.SOLVER.LOSS_WEIGHT.' + loss_name) - avg_loss += loss_weight * avg_loss_list[i] - - #get pred result in original size - if isinstance(logits, tuple): - logit = logits[0] - else: - logit = logits - - if logit.shape[2:] != label.shape[2:]: - logit = fluid.layers.resize_bilinear(logit, label.shape[2:]) - - # return image input and logit output for inference graph prune - if ModelPhase.is_predict(phase): - # 两类分割中,使用lovasz_hinge_loss或dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换 - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - return image1, logit - else: - return image1, image2, logit - - if class_num == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if ModelPhase.is_visual(phase): - if class_num == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = softmax(logit) - pred, _ = fluid.layers.split(logit, 2, dim=1) - return pred, logit - - if ModelPhase.is_eval(phase): - if cfg.VIS.VISINEVAL: - logit = softmax(logit) - pred, _ = fluid.layers.split(logit, 2, dim=1) - return data_loader, avg_loss, pred, label, mask - - if ModelPhase.is_train(phase): - optimizer = solver.Solver(main_prog, start_prog) - decayed_lr = optimizer.optimise(avg_loss) - return data_loader, avg_loss, decayed_lr, pred, label, mask - - -def to_int(string, dest="I"): - return struct.unpack(dest, string)[0] - - -def parse_shape_from_file(filename): - with open(filename, "rb") as file: - version = file.read(4) - lod_level = to_int(file.read(8), dest="Q") - for i in range(lod_level): - _size = to_int(file.read(8), dest="Q") - _ = file.read(_size) - version = file.read(4) - tensor_desc_size = to_int(file.read(4)) - tensor_desc = VarType.TensorDesc() - tensor_desc.ParseFromString(file.read(tensor_desc_size)) - return tuple(tensor_desc.dims) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/__init__.py deleted file mode 100644 index ba1baeb1ac..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/deeplab.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/deeplab.py deleted file mode 100644 index 7977148371..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/deeplab.py +++ /dev/null @@ -1,275 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import paddle -import paddle.fluid as fluid -from utils.config import cfg -from models.libs.model_libs import scope, name_scope -from models.libs.model_libs import bn, bn_relu, relu -from models.libs.model_libs import conv, conv_bn_layer -from models.libs.model_libs import separate_conv -from models.backbone.mobilenet_v2 import MobileNetV2 as mobilenet_backbone -from models.backbone.xception import Xception as xception_backbone - - -def encoder(input): - # 编码器配置,采用ASPP架构,pooling + 1x1_conv + 三个不同尺度的空洞卷积并行, concat后1x1conv - # ASPP_WITH_SEP_CONV:默认为真,使用depthwise可分离卷积,否则使用普通卷积 - # OUTPUT_STRIDE: 下采样倍数,8或16,决定aspp_ratios大小 - # aspp_ratios:ASPP模块空洞卷积的采样率 - - if cfg.MODEL.DEEPLAB.OUTPUT_STRIDE == 16: - aspp_ratios = [6, 12, 18] - elif cfg.MODEL.DEEPLAB.OUTPUT_STRIDE == 8: - aspp_ratios = [12, 24, 36] - else: - raise Exception("deeplab only support stride 8 or 16") - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('encoder'): - channel = 256 - with scope("image_pool"): - image_avg = fluid.layers.reduce_mean(input, [2, 3], keep_dim=True) - image_avg = bn_relu( - conv( - image_avg, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - image_avg = fluid.layers.resize_bilinear(image_avg, input.shape[2:]) - - with scope("aspp0"): - aspp0 = bn_relu( - conv( - input, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - with scope("aspp1"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp1 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[0], act=relu) - else: - aspp1 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[0], - padding=aspp_ratios[0], - param_attr=param_attr)) - with scope("aspp2"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp2 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[1], act=relu) - else: - aspp2 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[1], - padding=aspp_ratios[1], - param_attr=param_attr)) - with scope("aspp3"): - if cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV: - aspp3 = separate_conv( - input, channel, 1, 3, dilation=aspp_ratios[2], act=relu) - else: - aspp3 = bn_relu( - conv( - input, - channel, - stride=1, - filter_size=3, - dilation=aspp_ratios[2], - padding=aspp_ratios[2], - param_attr=param_attr)) - with scope("concat"): - data = fluid.layers.concat([image_avg, aspp0, aspp1, aspp2, aspp3], - axis=1) - data = bn_relu( - conv( - data, - channel, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - data = fluid.layers.dropout(data, 0.9) - return data - - -def decoder(encode_data, decode_shortcut): - # 解码器配置 - # encode_data:编码器输出 - # decode_shortcut: 从backbone引出的分支, resize后与encode_data concat - # DECODER_USE_SEP_CONV: 默认为真,则concat后连接两个可分离卷积,否则为普通卷积 - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('decoder'): - with scope('concat'): - decode_shortcut = bn_relu( - conv( - decode_shortcut, - 48, - 1, - 1, - groups=1, - padding=0, - param_attr=param_attr)) - - encode_data = fluid.layers.resize_bilinear( - encode_data, decode_shortcut.shape[2:]) - encode_data = fluid.layers.concat([encode_data, decode_shortcut], - axis=1) - if cfg.MODEL.DEEPLAB.DECODER_USE_SEP_CONV: - with scope("separable_conv1"): - encode_data = separate_conv( - encode_data, 256, 1, 3, dilation=1, act=relu) - with scope("separable_conv2"): - encode_data = separate_conv( - encode_data, 256, 1, 3, dilation=1, act=relu) - else: - with scope("decoder_conv1"): - encode_data = bn_relu( - conv( - encode_data, - 256, - stride=1, - filter_size=3, - dilation=1, - padding=1, - param_attr=param_attr)) - with scope("decoder_conv2"): - encode_data = bn_relu( - conv( - encode_data, - 256, - stride=1, - filter_size=3, - dilation=1, - padding=1, - param_attr=param_attr)) - return encode_data - - -def mobilenetv2(input): - # Backbone: mobilenetv2结构配置 - # DEPTH_MULTIPLIER: mobilenetv2的scale设置,默认1.0 - # OUTPUT_STRIDE:下采样倍数 - # end_points: mobilenetv2的block数 - # decode_point: 从mobilenetv2中引出分支所在block数, 作为decoder输入 - scale = cfg.MODEL.DEEPLAB.DEPTH_MULTIPLIER - output_stride = cfg.MODEL.DEEPLAB.OUTPUT_STRIDE - model = mobilenet_backbone(scale=scale, output_stride=output_stride) - end_points = 18 - decode_point = 4 - data, decode_shortcuts = model.net( - input, end_points=end_points, decode_points=decode_point) - decode_shortcut = decode_shortcuts[decode_point] - return data, decode_shortcut - - -def xception(input): - # Backbone: Xception结构配置, xception_65, xception_41, xception_71三种可选 - # decode_point: 从Xception中引出分支所在block数,作为decoder输入 - # end_point:Xception的block数 - cfg.MODEL.DEFAULT_EPSILON = 1e-3 - model = xception_backbone(cfg.MODEL.DEEPLAB.BACKBONE) - backbone = cfg.MODEL.DEEPLAB.BACKBONE - output_stride = cfg.MODEL.DEEPLAB.OUTPUT_STRIDE - if '65' in backbone: - decode_point = 2 - end_points = 21 - if '41' in backbone: - decode_point = 2 - end_points = 13 - if '71' in backbone: - decode_point = 3 - end_points = 23 - data, decode_shortcuts = model.net( - input, - output_stride=output_stride, - end_points=end_points, - decode_points=decode_point) - decode_shortcut = decode_shortcuts[decode_point] - return data, decode_shortcut - - -def deeplabv3p(img, num_classes): - # Backbone设置:xception 或 mobilenetv2 - if 'xception' in cfg.MODEL.DEEPLAB.BACKBONE: - data, decode_shortcut = xception(img) - elif 'mobilenet' in cfg.MODEL.DEEPLAB.BACKBONE: - data, decode_shortcut = mobilenetv2(img) - else: - raise Exception("deeplab only support xception and mobilenet backbone") - - # 编码器解码器设置 - cfg.MODEL.DEFAULT_EPSILON = 1e-5 - if cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP: - data = encoder(data) - if cfg.MODEL.DEEPLAB.ENABLE_DECODER: - data = decoder(data, decode_shortcut) - - # 根据类别数设置最后一个卷积层输出,并resize到图片原始尺寸 - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - with scope('logit'): - with fluid.name_scope('last_conv'): - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - logit = conv( - data, - num_classes, - 1, - stride=1, - padding=0, - bias_attr=True, - param_attr=param_attr) - else: - logit = conv_bn_layer( - data, - 3, - 256, - stride=1, - padding=1, - if_act=False, - name="fuse0") - logit = fluid.layers.relu(logit) - logit = fluid.layers.resize_bilinear(logit, img.shape[2:]) - - return logit diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/fast_scnn.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/fast_scnn.py deleted file mode 100644 index f9f364e9b0..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/fast_scnn.py +++ /dev/null @@ -1,304 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from models.libs.model_libs import scope -from models.libs.model_libs import bn, bn_relu, relu, conv_bn_layer -from models.libs.model_libs import conv, avg_pool -from models.libs.model_libs import separate_conv -from utils.config import cfg - - -def learning_to_downsample(x, dw_channels1=32, dw_channels2=48, - out_channels=64): - x = relu(bn(conv(x, dw_channels1, 3, 2))) - with scope('dsconv1'): - x = separate_conv( - x, dw_channels2, stride=2, filter=3, act=fluid.layers.relu) - with scope('dsconv2'): - x = separate_conv( - x, out_channels, stride=2, filter=3, act=fluid.layers.relu) - return x - - -def shortcut(input, data_residual): - return fluid.layers.elementwise_add(input, data_residual) - - -def dropout2d(input, prob, is_train=False): - if not is_train: - return input - channels = input.shape[1] - keep_prob = 1.0 - prob - shape = fluid.layers.shape(input) - random_tensor = keep_prob + fluid.layers.uniform_random( - [shape[0], channels, 1, 1], min=0., max=1.) - binary_tensor = fluid.layers.floor(random_tensor) - output = input / keep_prob * binary_tensor - return output - - -def inverted_residual_unit(input, - num_in_filter, - num_filters, - ifshortcut, - stride, - filter_size, - padding, - expansion_factor, - name=None): - num_expfilter = int(round(num_in_filter * expansion_factor)) - - channel_expand = conv_bn_layer( - input=input, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=True, - name=name + '_expand') - - bottleneck_conv = conv_bn_layer( - input=channel_expand, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - if_act=True, - name=name + '_dwise', - use_cudnn=False) - - depthwise_output = bottleneck_conv - - linear_out = conv_bn_layer( - input=bottleneck_conv, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1, - if_act=False, - name=name + '_linear') - - if ifshortcut: - out = shortcut(input=input, data_residual=linear_out) - return out, depthwise_output - else: - return linear_out, depthwise_output - - -def inverted_blocks(input, in_c, t, c, n, s, name=None): - first_block, depthwise_output = inverted_residual_unit( - input=input, - num_in_filter=in_c, - num_filters=c, - ifshortcut=False, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_1') - - last_residual_block = first_block - last_c = c - - for i in range(1, n): - last_residual_block, depthwise_output = inverted_residual_unit( - input=last_residual_block, - num_in_filter=last_c, - num_filters=c, - ifshortcut=True, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t, - name=name + '_' + str(i + 1)) - return last_residual_block, depthwise_output - - -def psp_module(input, out_features): - - cat_layers = [] - sizes = (1, 2, 3, 6) - for size in sizes: - psp_name = "psp" + str(size) - with scope(psp_name): - pool = fluid.layers.adaptive_pool2d( - input, - pool_size=[size, size], - pool_type='avg', - name=psp_name + '_adapool') - data = conv( - pool, - out_features, - filter_size=1, - bias_attr=False, - name=psp_name + '_conv') - data_bn = bn(data, act='relu') - interp = fluid.layers.resize_bilinear( - data_bn, - out_shape=input.shape[2:], - name=psp_name + '_interp', - align_mode=0) - cat_layers.append(interp) - cat_layers = [input] + cat_layers - out = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') - - return out - - -class FeatureFusionModule: - """Feature fusion module""" - - def __init__(self, - higher_in_channels, - lower_in_channels, - out_channels, - scale_factor=4): - self.higher_in_channels = higher_in_channels - self.lower_in_channels = lower_in_channels - self.out_channels = out_channels - self.scale_factor = scale_factor - - def net(self, higher_res_feature, lower_res_feature): - h, w = higher_res_feature.shape[2:] - lower_res_feature = fluid.layers.resize_bilinear( - lower_res_feature, [h, w], align_mode=0) - - with scope('dwconv'): - lower_res_feature = relu( - bn(conv(lower_res_feature, self.out_channels, - 1))) #(lower_res_feature) - with scope('conv_lower_res'): - lower_res_feature = bn( - conv(lower_res_feature, self.out_channels, 1, bias_attr=True)) - with scope('conv_higher_res'): - higher_res_feature = bn( - conv(higher_res_feature, self.out_channels, 1, bias_attr=True)) - out = higher_res_feature + lower_res_feature - - return relu(out) - - -class GlobalFeatureExtractor(): - """Global feature extractor module""" - - def __init__(self, - in_channels=64, - block_channels=(64, 96, 128), - out_channels=128, - t=6, - num_blocks=(3, 3, 3)): - self.in_channels = in_channels - self.block_channels = block_channels - self.out_channels = out_channels - self.t = t - self.num_blocks = num_blocks - - def net(self, x): - x, _ = inverted_blocks(x, self.in_channels, self.t, - self.block_channels[0], self.num_blocks[0], 2, - 'inverted_block_1') - x, _ = inverted_blocks(x, self.block_channels[0], self.t, - self.block_channels[1], self.num_blocks[1], 2, - 'inverted_block_2') - x, _ = inverted_blocks(x, self.block_channels[1], self.t, - self.block_channels[2], self.num_blocks[2], 1, - 'inverted_block_3') - x = psp_module(x, self.block_channels[2] // 4) - with scope('out'): - x = relu(bn(conv(x, self.out_channels, 1))) - return x - - -class Classifier: - """Classifier""" - - def __init__(self, dw_channels, num_classes, stride=1): - self.dw_channels = dw_channels - self.num_classes = num_classes - self.stride = stride - - def net(self, x): - with scope('dsconv1'): - x = separate_conv( - x, - self.dw_channels, - stride=self.stride, - filter=3, - act=fluid.layers.relu) - with scope('dsconv2'): - x = separate_conv( - x, - self.dw_channels, - stride=self.stride, - filter=3, - act=fluid.layers.relu) - - x = dropout2d(x, 0.1, is_train=cfg.PHASE == 'train') - x = conv(x, self.num_classes, 1, bias_attr=True) - return x - - -def aux_layer(x, num_classes): - x = relu(bn(conv(x, 32, 3, padding=1))) - x = dropout2d(x, 0.1, is_train=(cfg.PHASE == 'train')) - with scope('logit'): - x = conv(x, num_classes, 1, bias_attr=True) - return x - - -def fast_scnn(img, num_classes): - size = img.shape[2:] - classifier = Classifier(128, num_classes) - - global_feature_extractor = GlobalFeatureExtractor(64, [64, 96, 128], 128, 6, - [3, 3, 3]) - feature_fusion = FeatureFusionModule(64, 128, 128) - - with scope('learning_to_downsample'): - higher_res_features = learning_to_downsample(img, 32, 48, 64) - with scope('global_feature_extractor'): - lower_res_feature = global_feature_extractor.net(higher_res_features) - with scope('feature_fusion'): - x = feature_fusion.net(higher_res_features, lower_res_feature) - with scope('classifier'): - logit = classifier.net(x) - logit = fluid.layers.resize_bilinear(logit, size, align_mode=0) - - if len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 3: - with scope('aux_layer_higher'): - higher_logit = aux_layer(higher_res_features, num_classes) - higher_logit = fluid.layers.resize_bilinear( - higher_logit, size, align_mode=0) - with scope('aux_layer_lower'): - lower_logit = aux_layer(lower_res_feature, num_classes) - lower_logit = fluid.layers.resize_bilinear( - lower_logit, size, align_mode=0) - return logit, higher_logit, lower_logit - elif len(cfg.MODEL.MULTI_LOSS_WEIGHT) == 2: - with scope('aux_layer_higher'): - higher_logit = aux_layer(higher_res_features, num_classes) - higher_logit = fluid.layers.resize_bilinear( - higher_logit, size, align_mode=0) - return logit, higher_logit - - return logit diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/hrnet.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/hrnet.py deleted file mode 100644 index 21624c41f7..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/hrnet.py +++ /dev/null @@ -1,309 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr - -from utils.config import cfg - - -def conv_bn_layer(input, - filter_size, - num_filters, - stride=1, - padding=1, - num_groups=1, - if_act=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=num_groups, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr( - name=bn_name + "_scale", - initializer=fluid.initializer.Constant(1.0)), - bias_attr=ParamAttr( - name=bn_name + "_offset", - initializer=fluid.initializer.Constant(0.0)), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - bn = fluid.layers.relu(bn) - return bn - - -def basic_block(input, num_filters, stride=1, downsample=False, name=None): - residual = input - conv = conv_bn_layer( - input=input, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv1') - conv = conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - if_act=False, - name=name + '_conv2') - if downsample: - residual = conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - -def bottleneck_block(input, num_filters, stride=1, downsample=False, name=None): - residual = input - conv = conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - name=name + '_conv1') - conv = conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv2') - conv = conv_bn_layer( - input=conv, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_conv3') - if downsample: - residual = conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - -def fuse_layers(x, channels, multi_scale_output=True, name=None): - out = [] - for i in range(len(channels) if multi_scale_output else 1): - residual = x[i] - shape = residual.shape - width = shape[-1] - height = shape[-2] - for j in range(len(channels)): - if j > i: - y = conv_bn_layer( - x[j], - filter_size=1, - num_filters=channels[i], - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) - y = fluid.layers.resize_bilinear( - input=y, out_shape=[height, width]) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - elif j < i: - y = x[j] - for k in range(i - j): - if k == i - j - 1: - y = conv_bn_layer( - y, - filter_size=3, - num_filters=channels[i], - stride=2, - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - else: - y = conv_bn_layer( - y, - filter_size=3, - num_filters=channels[j], - stride=2, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - - residual = fluid.layers.relu(residual) - out.append(residual) - return out - - -def branches(x, block_num, channels, name=None): - out = [] - for i in range(len(channels)): - residual = x[i] - for j in range(block_num): - residual = basic_block( - residual, - channels[i], - name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1)) - out.append(residual) - return out - - -def high_resolution_module(x, channels, multi_scale_output=True, name=None): - residual = branches(x, 4, channels, name=name) - out = fuse_layers( - residual, channels, multi_scale_output=multi_scale_output, name=name) - return out - - -def transition_layer(x, in_channels, out_channels, name=None): - num_in = len(in_channels) - num_out = len(out_channels) - out = [] - for i in range(num_out): - if i < num_in: - if in_channels[i] != out_channels[i]: - residual = conv_bn_layer( - x[i], - filter_size=3, - num_filters=out_channels[i], - name=name + '_layer_' + str(i + 1)) - out.append(residual) - else: - out.append(x[i]) - else: - residual = conv_bn_layer( - x[-1], - filter_size=3, - num_filters=out_channels[i], - stride=2, - name=name + '_layer_' + str(i + 1)) - out.append(residual) - return out - - -def stage(x, num_modules, channels, multi_scale_output=True, name=None): - out = x - for i in range(num_modules): - if i == num_modules - 1 and multi_scale_output == False: - out = high_resolution_module( - out, - channels, - multi_scale_output=False, - name=name + '_' + str(i + 1)) - else: - out = high_resolution_module( - out, channels, name=name + '_' + str(i + 1)) - - return out - - -def layer1(input, name=None): - conv = input - for i in range(4): - conv = bottleneck_block( - conv, - num_filters=64, - downsample=True if i == 0 else False, - name=name + '_' + str(i + 1)) - return conv - - -def high_resolution_net(input, num_classes): - - channels_2 = cfg.MODEL.HRNET.STAGE2.NUM_CHANNELS - channels_3 = cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS - channels_4 = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS - - num_modules_2 = cfg.MODEL.HRNET.STAGE2.NUM_MODULES - num_modules_3 = cfg.MODEL.HRNET.STAGE3.NUM_MODULES - num_modules_4 = cfg.MODEL.HRNET.STAGE4.NUM_MODULES - - x = conv_bn_layer( - input=input, - filter_size=3, - num_filters=64, - stride=2, - if_act=True, - name='layer1_1') - x = conv_bn_layer( - input=x, - filter_size=3, - num_filters=64, - stride=2, - if_act=True, - name='layer1_2') - - la1 = layer1(x, name='layer2') - tr1 = transition_layer([la1], [256], channels_2, name='tr1') - st2 = stage(tr1, num_modules_2, channels_2, name='st2') - tr2 = transition_layer(st2, channels_2, channels_3, name='tr2') - st3 = stage(tr2, num_modules_3, channels_3, name='st3') - tr3 = transition_layer(st3, channels_3, channels_4, name='tr3') - st4 = stage(tr3, num_modules_4, channels_4, name='st4') - - # upsample - shape = st4[0].shape - height, width = shape[-2], shape[-1] - st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=[height, width]) - st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=[height, width]) - st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=[height, width]) - - out = fluid.layers.concat(st4, axis=1) - last_channels = sum(channels_4) - - out = conv_bn_layer( - input=out, - filter_size=1, - num_filters=last_channels, - stride=1, - if_act=True, - name='conv-2') - out = fluid.layers.conv2d( - input=out, - num_filters=num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name='conv-1_weights'), - bias_attr=False) - - out = fluid.layers.resize_bilinear(out, input.shape[2:]) - - return out - - -def hrnet(input, num_classes): - logit = high_resolution_net(input, num_classes) - return logit - - -if __name__ == '__main__': - image_shape = [-1, 3, 769, 769] - image = fluid.data(name='image', shape=image_shape, dtype='float32') - logit = hrnet(image, 4) - print("logit:", logit.shape) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/icnet.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/icnet.py deleted file mode 100644 index ad9f1b250e..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/icnet.py +++ /dev/null @@ -1,197 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from utils.config import cfg -from models.libs.model_libs import scope -from models.libs.model_libs import bn, avg_pool, conv -from models.backbone.resnet import ResNet as resnet_backbone -import numpy as np - - -def interp(input, out_shape): - out_shape = list(out_shape.astype("int32")) - return fluid.layers.resize_bilinear(input, out_shape=out_shape) - - -def pyramis_pooling(input, input_shape): - shape = np.ceil(input_shape / 32).astype("int32") - h, w = shape - pool1 = avg_pool(input, [h, w], [h, w]) - pool1_interp = interp(pool1, shape) - pool2 = avg_pool(input, [h // 2, w // 2], [h // 2, w // 2]) - pool3 = avg_pool(input, [h // 3, w // 3], [h // 3, w // 3]) - pool4 = avg_pool(input, [h // 4, w // 4], [h // 4, w // 4]) - # official caffe repo eval use following hyparam - # pool2 = avg_pool(input, [17, 33], [16, 32]) - # pool3 = avg_pool(input, [13, 25], [10, 20]) - # pool4 = avg_pool(input, [8, 15], [5, 10]) - pool2_interp = interp(pool2, shape) - pool3_interp = interp(pool3, shape) - pool4_interp = interp(pool4, shape) - conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp - return conv5_3_sum - - -def zero_padding(input, padding): - return fluid.layers.pad(input, - [0, 0, 0, 0, padding, padding, padding, padding]) - - -def sub_net_4(input, input_shape): - tmp = pyramis_pooling(input, input_shape) - with scope("conv5_4_k1"): - tmp = conv(tmp, 256, 1, 1) - tmp = bn(tmp, act='relu') - tmp = interp(tmp, out_shape=np.ceil(input_shape / 16)) - return tmp - - -def sub_net_2(input): - with scope("conv3_1_sub2_proj"): - tmp = conv(input, 128, 1, 1) - tmp = bn(tmp) - return tmp - - -def sub_net_1(input): - with scope("conv1_sub1"): - tmp = conv(input, 32, 3, 2, padding=1) - tmp = bn(tmp, act='relu') - with scope("conv2_sub1"): - tmp = conv(tmp, 32, 3, 2, padding=1) - tmp = bn(tmp, act='relu') - with scope("conv3_sub1"): - tmp = conv(tmp, 64, 3, 2, padding=1) - tmp = bn(tmp, act='relu') - with scope("conv3_sub1_proj"): - tmp = conv(tmp, 128, 1, 1) - tmp = bn(tmp) - return tmp - - -def CCF24(sub2_out, sub4_out, input_shape): - with scope("conv_sub4"): - tmp = conv(sub4_out, 128, 3, dilation=2, padding=2) - tmp = bn(tmp) - tmp = tmp + sub2_out - tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, np.ceil(input_shape / 8)) - return tmp - - -def CCF124(sub1_out, sub24_out, input_shape): - tmp = zero_padding(sub24_out, padding=2) - with scope("conv_sub2"): - tmp = conv(tmp, 128, 3, dilation=2) - tmp = bn(tmp) - tmp = tmp + sub1_out - tmp = fluid.layers.relu(tmp) - tmp = interp(tmp, input_shape // 4) - return tmp - - -def resnet(input): - # ICNET backbone: resnet, 默认resnet50 - # end_points: resnet终止层数 - # decode_point: backbone引出分支所在层数 - # resize_point:backbone所在的该层卷积尺寸缩小至1/2 - # dilation_dict: resnet block数及对应的膨胀卷积尺度 - scale = cfg.MODEL.ICNET.DEPTH_MULTIPLIER - layers = cfg.MODEL.ICNET.LAYERS - model = resnet_backbone(scale=scale, layers=layers, stem='icnet') - end_points = 49 - decode_point = 13 - resize_point = 13 - dilation_dict = {2: 2, 3: 4} - data, decode_shortcuts = model.net( - input, - end_points=end_points, - decode_points=decode_point, - resize_points=resize_point, - dilation_dict=dilation_dict) - return data, decode_shortcuts[decode_point] - - -def encoder(data13, data49, input, input_shape): - # ICENT encoder配置 - # sub_net_4:对resnet49层数据进行pyramis_pooling操作 - # sub_net_2:对resnet13层数据进行卷积操作 - # sub_net_1: 对原始尺寸图像进行3次下采样卷积操作 - sub4_out = sub_net_4(data49, input_shape) - sub2_out = sub_net_2(data13) - sub1_out = sub_net_1(input) - return sub1_out, sub2_out, sub4_out - - -def decoder(sub1_out, sub2_out, sub4_out, input_shape): - # ICENT decoder配置 - # CCF: Cascade Feature Fusion 级联特征融合 - sub24_out = CCF24(sub2_out, sub4_out, input_shape) - sub124_out = CCF124(sub1_out, sub24_out, input_shape) - return sub24_out, sub124_out - - -def get_logit(data, num_classes, name="logit"): - param_attr = fluid.ParamAttr( - name=name + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - - with scope(name): - data = conv( - data, - num_classes, - 1, - stride=1, - padding=0, - param_attr=param_attr, - bias_attr=True) - return data - - -def icnet(input, num_classes): - # Backbone resnet: 输入 image_sub2: 图片尺寸缩小至1/2 - # 输出 data49: resnet第49层数据,原始尺寸1/32 - # data13:resnet第13层数据, 原始尺寸1/16 - input_shape = input.shape[2:] - input_shape = np.array(input_shape).astype("float32") - image_sub2 = interp(input, out_shape=np.ceil(input_shape * 0.5)) - data49, data13 = resnet(image_sub2) - - # encoder:输入:input, data13, data49,分别进行下采样,卷积和金字塔pooling操作 - # 输出:分别对应sub1_out, sub2_out, sub4_out - sub1_out, sub2_out, sub4_out = encoder(data13, data49, input, input_shape) - - # decoder: 对编码器三个分支结果进行级联特征融合 - sub24_out, sub124_out = decoder(sub1_out, sub2_out, sub4_out, input_shape) - - # get_logit: 根据类别数决定最后一层卷积输出 - logit124 = get_logit(sub124_out, num_classes, "logit124") - logit4 = get_logit(sub4_out, num_classes, "logit4") - logit24 = get_logit(sub24_out, num_classes, "logit24") - return logit124, logit24, logit4 - - -if __name__ == '__main__': - image_shape = [-1, 3, 320, 320] - image = fluid.data(name='image', shape=image_shape, dtype='float32') - logit = icnet(image, 4) - print("logit:", logit.shape) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/pspnet.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/pspnet.py deleted file mode 100644 index 377bb0e85a..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/pspnet.py +++ /dev/null @@ -1,115 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from models.libs.model_libs import scope, name_scope -from models.libs.model_libs import avg_pool, conv, bn -from models.backbone.resnet import ResNet as resnet_backbone -from utils.config import cfg - - -def get_logit_interp(input, num_classes, out_shape, name="logit"): - # 根据类别数决定最后一层卷积输出, 并插值回原始尺寸 - param_attr = fluid.ParamAttr( - name=name + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - - with scope(name): - logit = conv( - input, - num_classes, - filter_size=1, - param_attr=param_attr, - bias_attr=True, - name=name + '_conv') - logit_interp = fluid.layers.resize_bilinear( - logit, out_shape=out_shape, name=name + '_interp') - return logit_interp - - -def psp_module(input, out_features): - # Pyramid Scene Parsing 金字塔池化模块 - # 输入:backbone输出的特征 - # 输出:对输入进行不同尺度pooling, 卷积操作后插值回原始尺寸,并concat - # 最后进行一个卷积及BN操作 - - cat_layers = [] - sizes = (1, 2, 3, 6) - for size in sizes: - psp_name = "psp" + str(size) - with scope(psp_name): - pool = fluid.layers.adaptive_pool2d( - input, - pool_size=[size, size], - pool_type='avg', - name=psp_name + '_adapool') - data = conv( - pool, - out_features, - filter_size=1, - bias_attr=True, - name=psp_name + '_conv') - data_bn = bn(data, act='relu') - interp = fluid.layers.resize_bilinear( - data_bn, out_shape=input.shape[2:], name=psp_name + '_interp') - cat_layers.append(interp) - cat_layers = [input] + cat_layers[::-1] - cat = fluid.layers.concat(cat_layers, axis=1, name='psp_cat') - - psp_end_name = "psp_end" - with scope(psp_end_name): - data = conv( - cat, - out_features, - filter_size=3, - padding=1, - bias_attr=True, - name=psp_end_name) - out = bn(data, act='relu') - - return out - - -def resnet(input): - # PSPNET backbone: resnet, 默认resnet50 - # end_points: resnet终止层数 - # dilation_dict: resnet block数及对应的膨胀卷积尺度 - scale = cfg.MODEL.PSPNET.DEPTH_MULTIPLIER - layers = cfg.MODEL.PSPNET.LAYERS - end_points = layers - 1 - dilation_dict = {2: 2, 3: 4} - model = resnet_backbone(layers, scale, stem='pspnet') - data, _ = model.net( - input, end_points=end_points, dilation_dict=dilation_dict) - - return data - - -def pspnet(input, num_classes): - # Backbone: ResNet - res = resnet(input) - # PSP模块 - psp = psp_module(res, 512) - dropout = fluid.layers.dropout(psp, dropout_prob=0.1, name="dropout") - # 根据类别数决定最后一层卷积输出, 并插值回原始尺寸 - logit = get_logit_interp(dropout, num_classes, input.shape[2:]) - return logit diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/unet.py b/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/unet.py deleted file mode 100644 index 16355de43d..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/models/modeling/unet.py +++ /dev/null @@ -1,135 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import contextlib -import paddle -import paddle.fluid as fluid -from utils.config import cfg -from models.libs.model_libs import scope, name_scope -from models.libs.model_libs import bn, bn_relu, relu -from models.libs.model_libs import conv, max_pool, deconv - - -def double_conv(data, out_ch): - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope("conv0"): - data = bn_relu( - conv(data, out_ch, 3, stride=1, padding=1, param_attr=param_attr)) - with scope("conv1"): - data = bn_relu( - conv(data, out_ch, 3, stride=1, padding=1, param_attr=param_attr)) - return data - - -def down(data, out_ch): - # 下采样:max_pool + 2个卷积 - with scope("down"): - data = max_pool(data, 2, 2, 0) - data = double_conv(data, out_ch) - return data - - -def up(data, short_cut, out_ch): - # 上采样:data上采样(resize或deconv), 并与short_cut concat - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.XavierInitializer(), - ) - with scope("up"): - if cfg.MODEL.UNET.UPSAMPLE_MODE == 'bilinear': - data = fluid.layers.resize_bilinear(data, short_cut.shape[2:]) - else: - data = deconv( - data, - out_ch // 2, - filter_size=2, - stride=2, - padding=0, - param_attr=param_attr) - data = fluid.layers.concat([data, short_cut], axis=1) - data = double_conv(data, out_ch) - return data - - -def encode(data): - # 编码器设置 - short_cuts = [] - with scope("encode"): - with scope("block1"): - data = double_conv(data, 64) - short_cuts.append(data) - with scope("block2"): - data = down(data, 128) - short_cuts.append(data) - with scope("block3"): - data = down(data, 256) - short_cuts.append(data) - with scope("block4"): - data = down(data, 512) - short_cuts.append(data) - with scope("block5"): - data = down(data, 512) - return data, short_cuts - - -def decode(data, short_cuts): - # 解码器设置,与编码器对称 - with scope("decode"): - with scope("decode1"): - data = up(data, short_cuts[3], 256) - with scope("decode2"): - data = up(data, short_cuts[2], 128) - with scope("decode3"): - data = up(data, short_cuts[1], 64) - with scope("decode4"): - data = up(data, short_cuts[0], 64) - return data - - -def get_logit(data, num_classes): - # 根据类别数设置最后一个卷积层输出 - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - with scope("logit"): - data = conv( - data, num_classes, 3, stride=1, padding=1, param_attr=param_attr) - return data - - -def unet(input, num_classes): - # UNET网络配置,对称的编码器解码器 - encode_data, short_cuts = encode(input) - decode_data = decode(encode_data, short_cuts) - logit = get_logit(decode_data, num_classes) - return logit - - -if __name__ == '__main__': - image_shape = [-1, 3, 320, 320] - image = fluid.data(name='image', shape=image_shape, dtype='float32') - logit = unet(image, 4) - print("logit:", logit.shape) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/planB.py b/legacy/contrib/NeurIPS_SN7/pdseg/planB.py deleted file mode 100644 index df75dcb7e8..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/planB.py +++ /dev/null @@ -1,163 +0,0 @@ -import os -import sys -import math - -import cv2 -from PIL import Image -import numpy as np - -from utils.config import cfg -import pdseg.demo_server as ps - -# ###### common configs ###### - -# pre resize -pre_height = None -pre_width = None -# final output size -target_height = 1250 -target_width = 1250 -# stride -height_stride = 1250 -width_stride = 1250 -# padding, always the same as ignore pixel -padding_pixel = 0 -# url -url = "http://10.255.94.19:8000/put_image" - -# ########################### - -# ###### global init ###### -cfg.DATASET.INPUT_IMAGE_NUM = 2 -fetch_list_diff, test_prog_diff, exe_diff = ps.load_model() - -cfg.DATASET.INPUT_IMAGE_NUM = 1 -fetch_list_seg, test_prog_seg, exe_seg = ps.load_model() - -########################### - - -def get_color_map_list(num_classes): - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j, lab = 0, i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - return color_map - - -def compose_img(im_list, rows, cols, save_file): - image = Image.new('P', - (cols * target_width, rows * target_height)) # 创建一个新图 - for y in range(rows): - for x in range(cols): - patch = Image.fromarray(im_list[cols * y + x]) - image.paste(patch, (x * target_width, y * target_height)) - - color_map = get_color_map_list(256) - image.putpalette(color_map) - image.save(save_file) - - -def divide_img(img): - src_im_height = img.shape[0] - src_im_width = img.shape[1] - - ret_imgs = [] - - x1, y1, idx = 0, 0, 0 - while y1 < src_im_height: - y2 = y1 + target_height - while x1 < src_im_width: - x2 = x1 + target_width - img_crop = img[y1:y2, x1:x2] - if y2 > src_im_height or x2 > src_im_width: - pad_bottom = y2 - src_im_height if y2 > src_im_height else 0 - pad_right = x2 - src_im_width if x2 > src_im_width else 0 - img_crop = cv2.copyMakeBorder( - img_crop, - 0, - pad_bottom, - 0, - pad_right, - cv2.BORDER_CONSTANT, - value=padding_pixel) - ret_imgs.append(img_crop) - x1 += width_stride - idx += 1 - x1 = 0 - y1 += height_stride - - return ret_imgs - - -def infer(img1, img2, idx): - if img2 is not None: - cfg.DATASET.INPUT_IMAGE_NUM = 2 - res1, res2, diff = ps.inference(img1, img2, fetch_list_diff, - test_prog_diff, exe_diff) - res1_dict[idx] = res1 - res2_dict[idx] = res2 - diff_dict[idx] = diff - else: - cfg.DATASET.INPUT_IMAGE_NUM = 1 - res = ps.inference(img1, img2, fetch_list_seg, test_prog_seg, exe_seg) - res1_dict[idx] = res - - -def divide_and_infer(img1, img2, save_dir): - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - src_im_height = img1.shape[0] - src_im_width = img1.shape[1] - - cols = math.ceil(src_im_width / target_width) - rows = math.ceil(src_im_height / target_height) - nums = cols * rows - - patch1 = divide_img(img1) - patch2 = divide_img(img2) if img2 is not None else [None] * nums - print("divide into %d patch" % nums) - - global res1_dict, res2_dict, diff_dict - res1_dict, res2_dict, diff_dict = {}, {}, {} - for i in range(nums): - infer(patch1[i], patch2[i], i) - - res1_list = [res1_dict[i] for i in range(nums)] - compose_img(res1_list, rows, cols, save_dir + "/res1.png") - if img2 is not None: - res2_list = [res2_dict[i] for i in range(nums)] - compose_img(res2_list, rows, cols, save_dir + "/res2.png") - diff_list = [diff_dict[i] for i in range(nums)] - compose_img(diff_list, rows, cols, save_dir + "/diff.png") - - -def main(im1_file, im2_file, save_dir): - img1 = np.array(Image.open(im1_file)) - img2 = None - if im2_file is not None: - img2 = np.array(Image.open(im2_file)) - - if pre_height and pre_width: - img1 = cv2.resize(img1, (pre_height, pre_width)) - if img2 is not None: - img2 = cv2.resize(img2, (pre_height, pre_width)) - divide_and_infer(img1, img2, save_dir) - - -if __name__ == '__main__': - args = sys.argv - if len(args) == 2: - im1_file = args[1] - im2_file = None - else: - im1_file = args[1] - im2_file = args[2] - - main(im1_file, im2_file, save_dir="result") diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/reader.py b/legacy/contrib/NeurIPS_SN7/pdseg/reader.py deleted file mode 100644 index bee564c7ea..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/reader.py +++ /dev/null @@ -1,491 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import sys -import os -import math -import random -import functools -import io -import time -import codecs - -import numpy as np -import paddle -import paddle.fluid as fluid -import cv2 -from PIL import Image - -import data_aug as aug -from utils.config import cfg -from data_utils import GeneratorEnqueuer -from models.model_builder import ModelPhase -import copy - - -def pil_imread(file_path): - """read pseudo-color label""" - im = Image.open(file_path) - return np.asarray(im) - - -def cv2_imread(file_path, flag=cv2.IMREAD_COLOR): - # resolve cv2.imread open Chinese file path issues on Windows Platform. - return cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), flag) - - -class SegDataset(object): - def __init__(self, - file_list, - data_dir, - shuffle=False, - mode=ModelPhase.TRAIN): - self.mode = mode - self.shuffle = shuffle - self.data_dir = data_dir - - self.shuffle_seed = 0 - # NOTE: Please ensure file list was save in UTF-8 coding format - with codecs.open(file_list, 'r', 'utf-8') as flist: - self.lines = [line.strip() for line in flist] - self.all_lines = copy.deepcopy(self.lines) - if shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - elif shuffle: - np.random.shuffle(self.lines) - - def generator(self): - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // cfg.NUM_TRAINERS - self.lines = self.all_lines[num_lines * cfg.TRAINER_ID:num_lines * - (cfg.TRAINER_ID + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - for line in self.lines: - yield self.process_image(line, self.data_dir, self.mode) - - def sharding_generator(self, pid=0, num_processes=1): - """ - Use line id as shard key for multiprocess io - It's a normal generator if pid=0, num_processes=1 - """ - for index, line in enumerate(self.lines): - # Use index and pid to shard file list - if index % num_processes == pid: - yield self.process_image(line, self.data_dir, self.mode) - - def batch_reader(self, batch_size): - br = self.batch(self.reader, batch_size) - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - for batch in br: - yield batch[0], batch[1], batch[2] - else: - for batch in br: - yield batch[0], batch[1], batch[2], batch[3] - - def multiprocess_generator(self, max_queue_size=32, num_processes=8): - # Re-shuffle file list - if self.shuffle and cfg.NUM_TRAINERS > 1: - np.random.RandomState(self.shuffle_seed).shuffle(self.all_lines) - num_lines = len(self.all_lines) // cfg.NUM_TRAINERS - self.lines = self.all_lines[num_lines * cfg.TRAINER_ID:num_lines * - (cfg.TRAINER_ID + 1)] - self.shuffle_seed += 1 - elif self.shuffle: - np.random.shuffle(self.lines) - - # Create multiple sharding generators according to num_processes for multiple processes - generators = [] - for pid in range(num_processes): - generators.append(self.sharding_generator(pid, num_processes)) - - try: - enqueuer = GeneratorEnqueuer(generators) - enqueuer.start(max_queue_size=max_queue_size, workers=num_processes) - while True: - generator_out = None - while enqueuer.is_running(): - if not enqueuer.queue.empty(): - generator_out = enqueuer.queue.get(timeout=5) - break - else: - time.sleep(0.01) - if generator_out is None: - break - yield generator_out - finally: - if enqueuer is not None: - enqueuer.stop() - - def batch(self, reader, batch_size, is_test=False, drop_last=False): - def batch_reader(is_test=False, drop_last=drop_last): - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - if is_test: - imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], [] - for img, grt, img_name, valid_shape, org_shape in reader(): - imgs.append(img) - grts.append(grt) - img_names.append(img_name) - valid_shapes.append(valid_shape) - org_shapes.append(org_shape) - if len(imgs) == batch_size: - yield np.array(imgs), np.array( - grts), img_names, np.array( - valid_shapes), np.array(org_shapes) - imgs, grts, img_names, valid_shapes, org_shapes = [], [], [], [], [] - - if not drop_last and len(imgs) > 0: - yield np.array(imgs), np.array( - grts), img_names, np.array(valid_shapes), np.array( - org_shapes) - else: - imgs, labs, ignore = [], [], [] - bs = 0 - for img, lab, ig in reader(): - imgs.append(img) - labs.append(lab) - ignore.append(ig) - bs += 1 - if bs == batch_size: - yield np.array(imgs), np.array(labs), np.array( - ignore) - bs = 0 - imgs, labs, ignore = [], [], [] - - if not drop_last and bs > 0: - yield np.array(imgs), np.array(labs), np.array(ignore) - else: - if is_test: - img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes = [], [], [], [], [], [], [] - for img1, img2, grt, img1_name, img2_name, valid_shape, org_shape in reader( - ): - img1s.append(img1) - img2s.append(img2) - grts.append(grt) - img1_names.append(img1_name) - img2_names.append(img2_name) - valid_shapes.append(valid_shape) - org_shapes.append(org_shape) - if len(img1s) == batch_size: - yield np.array(img1s), np.array(img2s), np.array(grts), \ - img1_names, img2_names, np.array(valid_shapes), np.array(org_shapes) - img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes = [], [], [], [], [], [], [] - - if not drop_last and len(img1s) > 0: - yield np.array(img1s), np.array(img2s), np.array(grts), \ - img1_names, img2_names, np.array(valid_shapes), np.array(org_shapes) - else: - img1s, img2s, labs, ignore = [], [], [], [] - bs = 0 - for img1, img2, lab, ig in reader(): - img1s.append(img1) - img2s.append(img2) - labs.append(lab) - ignore.append(ig) - bs += 1 - if bs == batch_size: - yield np.array(img1s), np.array(img2s), np.array( - labs), np.array(ignore) - bs = 0 - img1s, img2s, labs, ignore = [], [], [], [] - - if not drop_last and bs > 0: - yield np.array(img1s), np.array(img2s), np.array( - labs), np.array(ignore) - - return batch_reader(is_test, drop_last) - - def load_image(self, line, src_dir, mode=ModelPhase.TRAIN): - # original image cv2.imread flag setting - cv2_imread_flag = cv2.IMREAD_COLOR - if cfg.DATASET.IMAGE_TYPE == "rgba": - # If use RBGA 4 channel ImageType, use IMREAD_UNCHANGED flags to - # reserver alpha channel - cv2_imread_flag = cv2.IMREAD_UNCHANGED - - parts = line.strip().split(cfg.DATASET.SEPARATOR) - - if len(parts) == 1: - img1_name, img2_name, grt1_name, grt2_name = parts[ - 0], None, None, None - elif len(parts) == 2: - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - img1_name, img2_name, grt1_name, grt2_name = parts[ - 0], None, parts[1], None - else: - img1_name, img2_name, grt1_name, grt2_name = parts[0], parts[ - 1], None, None - elif len(parts) == 3: - img1_name, img2_name, grt1_name, grt2_name = parts[0], parts[ - 1], parts[2], None - elif len(parts) == 4: - img1_name, img2_name, grt1_name, grt2_name = parts[0], parts[ - 1], parts[2], parts[3] - else: - raise Exception("File list format incorrect! It should be" - " image_name{}label_name\\n".format( - cfg.DATASET.SEPARATOR)) - - # read input image 1 - img1_path = os.path.join(src_dir, img1_name) - img1 = cv2_imread(img1_path, cv2_imread_flag) - if img1 is None: - raise Exception("Empty image, src_dir: {}, img: {}".format( - src_dir, img1_path)) - if len(img1.shape) < 3: - img1 = cv2.cvtColor(img1, cv2.COLOR_GRAY2BGR) - - # read input image 2 - if img2_name is not None: - img2_path = os.path.join(src_dir, img2_name) - img2 = cv2_imread(img2_path, cv2_imread_flag) - if img2 is None: - raise Exception("Empty image, src_dir: {}, img: {}".format( - src_dir, img2_path)) - if len(img2.shape) < 3: - img2 = cv2.cvtColor(img2, cv2.COLOR_GRAY2BGR) - if img1.shape != img2.shape: - raise Exception( - "source img1 and source img2 must has the same size") - else: - img2 = None - - # read input label image - if grt1_name is not None: - grt1_path = os.path.join(src_dir, grt1_name) - grt1 = pil_imread(grt1_path) - if grt1 is None: - raise Exception("Empty image, src_dir: {}, label: {}".format( - src_dir, grt1_path)) - grt1_height = grt1.shape[0] - grt1_width = grt1.shape[1] - img1_height = img1.shape[0] - img1_width = img1.shape[1] - if img1_height != grt1_height or img1_width != grt1_width: - raise Exception( - "source img and label img must has the same size") - else: - grt1 = None - - if grt2_name is not None: - grt2_path = os.path.join(src_dir, grt2_name) - grt2 = pil_imread(grt2_path) - if grt2 is None: - raise Exception("Empty image, src_dir: {}, label: {}".format( - src_dir, grt2_path)) - grt2_height = grt2.shape[0] - grt2_width = grt2.shape[1] - img2_height = img2.shape[0] - img2_width = img2.shape[1] - if img2_height != grt2_height or img2_width != grt2_width: - raise Exception( - "source img and label img must has the same size") - else: - grt2 = None - - img_channels = img1.shape[2] - if img_channels < 3: - raise Exception("PaddleSeg only supports gray, rgb or rgba image") - if img_channels != cfg.DATASET.DATA_DIM: - raise Exception( - "Input image channel({}) is not match cfg.DATASET.DATA_DIM({}), img_name={}" - .format(img_channels, cfg.DATASET.DATADIM, img1_name)) - if img_channels != len(cfg.MEAN): - raise Exception( - "img name {}, img chns {} mean size {}, size unequal".format( - img1_name, img_channels, len(cfg.MEAN))) - if img_channels != len(cfg.STD): - raise Exception( - "img name {}, img chns {} std size {}, size unequal".format( - img1_name, img_channels, len(cfg.STD))) - - return img1, img2, grt1, grt2, img1_name, img2_name, grt1_name, grt2_name - - def normalize_image(self, img): - """ 像素归一化后减均值除方差 """ - img = img.transpose((2, 0, 1)).astype('float32') / 255.0 - img_mean = np.array(cfg.MEAN).reshape((len(cfg.MEAN), 1, 1)) - img_std = np.array(cfg.STD).reshape((len(cfg.STD), 1, 1)) - if img.shape[0] > 3: - tile_times = img.shape[0] // 3 - img_mean = np.tile(img_mean, (tile_times, 1, 1)) - img_std = np.tile(img_std, (tile_times, 1, 1)) - img -= img_mean - img /= img_std - return img - - def test_aug(self, img1): - ret = img1 - for ops in cfg.TEST.TEST_AUG_FLIP_OPS: - if ops[0] == 'h': - ret = np.concatenate((ret, img1[::-1, :, :]), axis=2) - elif ops[0] == 'v': - ret = np.concatenate((ret, img1[:, ::-1, :]), axis=2) - elif ops[0] == 'm': - ret = np.concatenate((ret, np.transpose(img1, (1, 0, 2))), - axis=2) - else: - ret = np.concatenate( - (ret, np.transpose(np.rot90(img1, k=2), (1, 0, 2))), axis=2) - - for angle in cfg.TEST.TEST_AUG_ROTATE_OPS: - ret = np.concatenate((ret, np.rot90(img1, k=angle // 90)), axis=2) - - return ret - - def process_image(self, line, data_dir, mode): - """ process_image """ - img1, img2, grt1, grt2, img1_name, img2_name, grt1_name, grt2_name = self.load_image( - line, data_dir, mode=mode) - grt1 = grt1 + 1 if grt1 is not None else None - if mode == ModelPhase.TRAIN: - img1, img2, grt1, grt2 = aug.resize(img1, img2, grt1, grt2, mode) - img1, img2, grt1, grt2 = aug.rand_crop( - img1, img2, grt1, grt2, mode=mode) - if cfg.AUG.RICH_CROP.ENABLE: - if cfg.AUG.RICH_CROP.BLUR: - if cfg.AUG.RICH_CROP.BLUR_RATIO <= 0: - n = 0 - elif cfg.AUG.RICH_CROP.BLUR_RATIO >= 1: - n = 1 - else: - n = int(1.0 / cfg.AUG.RICH_CROP.BLUR_RATIO) - if n > 0: - if np.random.randint(0, n) == 0: - radius = np.random.randint(3, 10) - if radius % 2 != 1: - radius = radius + 1 - if radius > 9: - radius = 9 - img1 = cv2.GaussianBlur(img1, (radius, radius), 0, - 0) - if img2 is not None: - img2 = cv2.GaussianBlur(img2, (radius, radius), - 0, 0) - - img1, img2, grt1, grt2 = aug.random_rotation( - img1, - img2, - grt1, - grt2, - rich_crop_max_rotation=cfg.AUG.RICH_CROP.MAX_ROTATION, - mean_value=cfg.DATASET.PADDING_VALUE) - - img1, img2, grt1, grt2 = aug.rand_scale_aspect( - img1, - img2, - grt1, - grt2, - rich_crop_min_scale=cfg.AUG.RICH_CROP.MIN_AREA_RATIO, - rich_crop_aspect_ratio=cfg.AUG.RICH_CROP.ASPECT_RATIO) - - img1, img2 = aug.hsv_color_jitter( - img1, - img2, - brightness_jitter_ratio=cfg.AUG.RICH_CROP. - BRIGHTNESS_JITTER_RATIO, - saturation_jitter_ratio=cfg.AUG.RICH_CROP. - SATURATION_JITTER_RATIO, - contrast_jitter_ratio=cfg.AUG.RICH_CROP. - CONTRAST_JITTER_RATIO) - - if cfg.AUG.RANDOM_ROTATION90: - rot_k = np.random.randint(0, 4) - img1 = np.rot90(img1, k=rot_k) - img2 = np.rot90(img2, k=rot_k) if img2 is not None else None - grt1 = np.rot90(grt1, k=rot_k) - grt2 = np.rot90(grt2, k=rot_k) if grt2 is not None else None - - if cfg.AUG.FLIP: - if cfg.AUG.FLIP_RATIO <= 0: - n = 0 - elif cfg.AUG.FLIP_RATIO >= 1: - n = 1 - else: - n = int(1.0 / cfg.AUG.FLIP_RATIO) - if n > 0: - if np.random.randint(0, n) == 0: - img1 = img1[::-1, :, :] - img2 = img2[::-1, :, :] if img2 is not None else None - grt1 = grt1[::-1, :] - grt2 = grt2[::-1, :] if grt2 is not None else None - - if cfg.AUG.MIRROR: - if np.random.randint(0, 2) == 1: - img1 = img1[:, ::-1, :] - img2 = img2[:, ::-1, :] if img2 is not None else None - grt1 = grt1[:, ::-1] - grt2 = grt2[:, ::-1] if grt2 is not None else None - - elif ModelPhase.is_eval(mode): - img1, img2, grt1, grt2 = aug.resize( - img1, img2, grt1, grt2, mode=mode) - img1, img2, grt1, grt2 = aug.rand_crop( - img1, img2, grt1, grt2, mode=mode) - if cfg.TEST.TEST_AUG: - img1 = self.test_aug(img1) - img2 = self.test_aug(img2) if img2 is not None else None - - elif ModelPhase.is_visual(mode): - org_shape = [img1.shape[0], img1.shape[1]] - img1, img2, grt1, grt2 = aug.resize( - img1, img2, grt1, grt2, mode=mode) - valid_shape = [img1.shape[0], img1.shape[1]] - img1, img2, grt1, grt2 = aug.rand_crop( - img1, img2, grt1, grt2, mode=mode) - else: - raise ValueError("Dataset mode={} Error!".format(mode)) - - # Normalize image - img1 = self.normalize_image(img1) - img2 = self.normalize_image(img2) if img2 is not None else None - - if grt2 is not None: - grt = grt1 * cfg.DATASET.NUM_CLASSES + grt2 - - unchange_idx = np.where((grt1 - grt2) == 0) - grt[unchange_idx] = 0 - if cfg.DATASET.NUM_CLASSES == 2: - grt[np.where(grt != 0)] = 1 - - ignore_idx = np.where((grt1 == cfg.DATASET.IGNORE_INDEX) - | (grt2 == cfg.DATASET.IGNORE_INDEX)) - grt[ignore_idx] = cfg.DATASET.IGNORE_INDEX - else: - grt = grt1 - - if ModelPhase.is_train(mode) or ModelPhase.is_eval(mode): - grt = np.expand_dims(np.array(grt).astype('int32'), axis=0) - ignore = (grt != cfg.DATASET.IGNORE_INDEX).astype('int32') - - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - if ModelPhase.is_train(mode): - return (img1, grt, ignore) - elif ModelPhase.is_eval(mode): - return (img1, grt, ignore) - elif ModelPhase.is_visual(mode): - return (img1, grt, img1_name, valid_shape, org_shape) - else: - if ModelPhase.is_train(mode): - return (img1, img2, grt, ignore) - elif ModelPhase.is_eval(mode): - return (img1, img2, grt, ignore) - elif ModelPhase.is_visual(mode): - return (img1, img2, grt, img1_name, img2_name, valid_shape, - org_shape) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/solver.py b/legacy/contrib/NeurIPS_SN7/pdseg/solver.py deleted file mode 100644 index bd67db0c37..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/solver.py +++ /dev/null @@ -1,162 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle.fluid as fluid -import numpy as np -import importlib -from utils.config import cfg -try: - from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecison, decorate, AutoMixedPrecisionLists -except: - from paddle.fluid.contrib.mixed_precision.decorator import OptimizerWithMixedPrecision, decorate, AutoMixedPrecisionLists - - -class Solver(object): - def __init__(self, main_prog, start_prog): - total_images = cfg.DATASET.TRAIN_TOTAL_IMAGES - self.weight_decay = cfg.SOLVER.WEIGHT_DECAY - self.momentum = cfg.SOLVER.MOMENTUM - self.momentum2 = cfg.SOLVER.MOMENTUM2 - self.step_per_epoch = total_images // cfg.BATCH_SIZE - if total_images % cfg.BATCH_SIZE != 0: - self.step_per_epoch += 1 - self.total_step = cfg.SOLVER.NUM_EPOCHS * self.step_per_epoch - self.main_prog = main_prog - self.start_prog = start_prog - self.warmup_step = cfg.SOLVER.LR_WARMUP_STEPS if cfg.SOLVER.LR_WARMUP else -1 - self.decay_step = self.total_step - self.warmup_step - self.decay_epochs = cfg.SOLVER.NUM_EPOCHS - self.warmup_step / self.step_per_epoch - - def lr_warmup(self, learning_rate, start_lr, end_lr): - linear_step = end_lr - start_lr - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate_warmup") - - global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() - warmup_counter = fluid.layers.autoincreased_step_counter( - counter_name='@LR_DECAY_COUNTER_WARMUP_IN_SEG@', begin=1, step=1) - global_counter = fluid.default_main_program().global_block( - ).vars['@LR_DECAY_COUNTER@'] - warmup_counter = fluid.layers.cast(warmup_counter, 'float32') - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(warmup_counter <= self.warmup_step): - decayed_lr = start_lr + linear_step * ( - warmup_counter / self.warmup_step) - fluid.layers.tensor.assign(decayed_lr, lr) - # hold the global_step to 0 during the warm-up phase - fluid.layers.increment(global_counter, value=-1) - with switch.default(): - fluid.layers.tensor.assign(learning_rate, lr) - return lr - - def piecewise_decay(self): - gamma = cfg.SOLVER.GAMMA - bd = [self.step_per_epoch * e for e in cfg.SOLVER.DECAY_EPOCH] - lr = [cfg.SOLVER.LR * (gamma**i) for i in range(len(bd) + 1)] - decayed_lr = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - return decayed_lr - - def poly_decay(self): - power = cfg.SOLVER.POWER - decayed_lr = fluid.layers.polynomial_decay( - cfg.SOLVER.LR, self.decay_step, end_learning_rate=0, power=power) - return decayed_lr - - def cosine_decay(self): - decayed_lr = fluid.layers.cosine_decay( - cfg.SOLVER.LR, self.step_per_epoch, self.decay_epochs) - return decayed_lr - - def get_lr(self, lr_policy): - if lr_policy.lower() == 'poly': - decayed_lr = self.poly_decay() - elif lr_policy.lower() == 'piecewise': - decayed_lr = self.piecewise_decay() - elif lr_policy.lower() == 'cosine': - decayed_lr = self.cosine_decay() - else: - raise Exception( - "unsupport learning decay policy! only support poly,piecewise,cosine" - ) - - decayed_lr = self.lr_warmup(decayed_lr, 0, cfg.SOLVER.LR) - return decayed_lr - - def sgd_optimizer(self, lr_policy, loss): - decayed_lr = self.get_lr(lr_policy) - optimizer = fluid.optimizer.Momentum( - learning_rate=decayed_lr, - momentum=self.momentum, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=self.weight_decay), - ) - if cfg.MODEL.FP16: - if cfg.MODEL.MODEL_NAME in ["pspnet"]: - custom_black_list = {"pool2d"} - else: - custom_black_list = {} - amp_lists = AutoMixedPrecisionLists( - custom_black_list=custom_black_list) - assert isinstance(cfg.MODEL.SCALE_LOSS, float) or isinstance(cfg.MODEL.SCALE_LOSS, str), \ - "data type of MODEL.SCALE_LOSS must be float or str" - if isinstance(cfg.MODEL.SCALE_LOSS, float): - optimizer = decorate( - optimizer, - amp_lists=amp_lists, - init_loss_scaling=cfg.MODEL.SCALE_LOSS, - use_dynamic_loss_scaling=False) - else: - assert cfg.MODEL.SCALE_LOSS.lower() in [ - 'dynamic' - ], "if MODEL.SCALE_LOSS is a string,\ - must be set as 'DYNAMIC'!" - - optimizer = decorate( - optimizer, - amp_lists=amp_lists, - use_dynamic_loss_scaling=True) - - optimizer.minimize(loss) - return decayed_lr - - def adam_optimizer(self, lr_policy, loss): - decayed_lr = self.get_lr(lr_policy) - optimizer = fluid.optimizer.Adam( - learning_rate=decayed_lr, - beta1=self.momentum, - beta2=self.momentum2, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=self.weight_decay), - ) - optimizer.minimize(loss) - return decayed_lr - - def optimise(self, loss): - lr_policy = cfg.SOLVER.LR_POLICY - opt = cfg.SOLVER.OPTIMIZER - - if opt.lower() == 'adam': - return self.adam_optimizer(lr_policy, loss) - elif opt.lower() == 'sgd': - return self.sgd_optimizer(lr_policy, loss) - else: - raise Exception( - "unsupport optimizer solver, only support adam and sgd") diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/tools/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/tools/__init__.py deleted file mode 100644 index ba1baeb1ac..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/tools/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/tools/create_dataset_list.py b/legacy/contrib/NeurIPS_SN7/pdseg/tools/create_dataset_list.py deleted file mode 100644 index 2067abcb52..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/tools/create_dataset_list.py +++ /dev/null @@ -1,151 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os.path -import argparse -import warnings - - -def parse_args(): - parser = argparse.ArgumentParser( - description= - 'PaddleSeg generate file list on cityscapes or your customized dataset.' - ) - parser.add_argument('dataset_root', help='dataset root directory', type=str) - parser.add_argument( - '--type', - help='dataset type: \n' - '- cityscapes \n' - '- custom(default)', - default="custom", - type=str) - parser.add_argument( - '--separator', - dest='separator', - help='file list separator', - default="|", - type=str) - parser.add_argument( - '--folder', - help='the folder names of images and labels', - type=str, - nargs=2, - default=['images', 'annotations']) - parser.add_argument( - '--second_folder', - help= - 'the second-level folder names of train set, validation set, test set', - type=str, - nargs='*', - default=['train', 'val', 'test']) - parser.add_argument( - '--format', - help='data format of images and labels, e.g. jpg or png.', - type=str, - nargs=2, - default=['jpg', 'png']) - parser.add_argument( - '--postfix', - help='postfix of images or labels', - type=str, - nargs=2, - default=['', '']) - - return parser.parse_args() - - -def cityscape_cfg(args): - args.postfix = ['_leftImg8bit', '_gtFine_labelTrainIds'] - - args.folder = ['leftImg8bit', 'gtFine'] - - args.format = ['png', 'png'] - - -def get_files(image_or_label, dataset_split, args): - dataset_root = args.dataset_root - postfix = args.postfix - format = args.format - folder = args.folder - - pattern = '*%s.%s' % (postfix[image_or_label], format[image_or_label]) - - search_files = os.path.join(dataset_root, folder[image_or_label], - dataset_split, pattern) - search_files2 = os.path.join(dataset_root, folder[image_or_label], - dataset_split, "*", pattern) # 包含子目录 - search_files3 = os.path.join(dataset_root, folder[image_or_label], - dataset_split, "*", "*", pattern) # 包含三级目录 - - filenames = glob.glob(search_files) - filenames2 = glob.glob(search_files2) - filenames3 = glob.glob(search_files3) - - filenames = filenames + filenames2 + filenames3 - - return sorted(filenames) - - -def generate_list(args): - dataset_root = args.dataset_root - separator = args.separator - - for dataset_split in args.second_folder: - print("Creating {}.txt...".format(dataset_split)) - image_files = get_files(0, dataset_split, args) - label_files = get_files(1, dataset_split, args) - if not image_files: - img_dir = os.path.join(dataset_root, args.folder[0], dataset_split) - warnings.warn("No images in {} !!!".format(img_dir)) - num_images = len(image_files) - - if not label_files: - label_dir = os.path.join(dataset_root, args.folder[1], - dataset_split) - warnings.warn("No labels in {} !!!".format(label_dir)) - num_label = len(label_files) - - if num_images != num_label and num_label > 0: - raise Exception( - "Number of images = {} number of labels = {} \n" - "Either number of images is equal to number of labels, " - "or number of labels is equal to 0.\n" - "Please check your dataset!".format(num_images, num_label)) - - file_list = os.path.join(dataset_root, dataset_split + '.txt') - with open(file_list, "w") as f: - for item in range(num_images): - left = image_files[item].replace(dataset_root, '') - if left[0] == os.path.sep: - left = left.lstrip(os.path.sep) - - try: - right = label_files[item].replace(dataset_root, '') - if right[0] == os.path.sep: - right = right.lstrip(os.path.sep) - line = left + separator + right + '\n' - except: - line = left + '\n' - - f.write(line) - print(line) - - -if __name__ == '__main__': - args = parse_args() - if args.type == 'cityscapes': - cityscape_cfg(args) - generate_list(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/tools/gray2pseudo_color.py b/legacy/contrib/NeurIPS_SN7/pdseg/tools/gray2pseudo_color.py deleted file mode 100644 index 3120678420..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/tools/gray2pseudo_color.py +++ /dev/null @@ -1,121 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import argparse -import os -import os.path as osp -import sys -import numpy as np -from PIL import Image - - -def parse_args(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - 'dir_or_file', help='input gray label directory or file list path') - parser.add_argument('output_dir', help='output colorful label directory') - parser.add_argument('--dataset_dir', help='dataset directory') - parser.add_argument('--file_separator', help='file list separator') - return parser.parse_args() - - -def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. - Args: - num_classes: Number of classes - Returns: - The color map - """ - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - - return color_map - - -def gray2pseudo_color(args): - """将灰度标注图片转换为伪彩色图片""" - input = args.dir_or_file - output_dir = args.output_dir - if not osp.exists(output_dir): - os.makedirs(output_dir) - print('Creating colorful label directory:', output_dir) - - color_map = get_color_map_list(256) - if os.path.isdir(input): - for fpath, dirs, fs in os.walk(input): - for f in fs: - try: - grt_path = osp.join(fpath, f) - _output_dir = fpath.replace(input, '') - _output_dir = _output_dir.lstrip(os.path.sep) - - im = Image.open(grt_path) - lbl = np.asarray(im) - - lbl_pil = Image.fromarray(lbl.astype(np.uint8), mode='P') - lbl_pil.putpalette(color_map) - - real_dir = osp.join(output_dir, _output_dir) - if not osp.exists(real_dir): - os.makedirs(real_dir) - new_grt_path = osp.join(real_dir, f) - - lbl_pil.save(new_grt_path) - print('New label path:', new_grt_path) - except: - continue - elif os.path.isfile(input): - if args.dataset_dir is None or args.file_separator is None: - print('No dataset_dir or file_separator input!') - sys.exit() - with open(input) as f: - for line in f: - parts = line.strip().split(args.file_separator) - grt_name = parts[1] - grt_path = os.path.join(args.dataset_dir, grt_name) - - im = Image.open(grt_path) - lbl = np.asarray(im) - - lbl_pil = Image.fromarray(lbl.astype(np.uint8), mode='P') - lbl_pil.putpalette(color_map) - - grt_dir, _ = osp.split(grt_name) - new_dir = osp.join(output_dir, grt_dir) - if not osp.exists(new_dir): - os.makedirs(new_dir) - new_grt_path = osp.join(output_dir, grt_name) - - lbl_pil.save(new_grt_path) - print('New label path:', new_grt_path) - else: - print('It\'s neither a dir nor a file') - - -if __name__ == '__main__': - args = parse_args() - gray2pseudo_color(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/tools/jingling2seg.py b/legacy/contrib/NeurIPS_SN7/pdseg/tools/jingling2seg.py deleted file mode 100644 index f9f240d190..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/tools/jingling2seg.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import argparse -import glob -import json -import os -import os.path as osp -import numpy as np -import PIL.Image - -from gray2pseudo_color import get_color_map_list -from labelme2seg import shape2label - - -def parse_args(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('input_dir', help='input annotated directory') - return parser.parse_args() - - -def main(args): - output_dir = osp.join(args.input_dir, 'annotations') - if not osp.exists(output_dir): - os.makedirs(output_dir) - print('Creating annotations directory:', output_dir) - - # get the all class names for the given dataset - class_names = ['_background_'] - for label_file in glob.glob(osp.join(args.input_dir, '*.json')): - with open(label_file) as f: - data = json.load(f) - if data['outputs']: - for output in data['outputs']['object']: - name = output['name'] - cls_name = name - if not cls_name in class_names: - class_names.append(cls_name) - - class_name_to_id = {} - for i, class_name in enumerate(class_names): - class_id = i # starts with 0 - class_name_to_id[class_name] = class_id - if class_id == 0: - assert class_name == '_background_' - class_names = tuple(class_names) - print('class_names:', class_names) - - out_class_names_file = osp.join(args.input_dir, 'class_names.txt') - with open(out_class_names_file, 'w') as f: - f.writelines('\n'.join(class_names)) - print('Saved class_names:', out_class_names_file) - - color_map = get_color_map_list(256) - - for label_file in glob.glob(osp.join(args.input_dir, '*.json')): - print('Generating dataset from:', label_file) - with open(label_file) as f: - base = osp.splitext(osp.basename(label_file))[0] - out_png_file = osp.join(output_dir, base + '.png') - - data = json.load(f) - - data_shapes = [] - if data['outputs']: - for output in data['outputs']['object']: - if 'polygon' in output.keys(): - polygon = output['polygon'] - name = output['name'] - - # convert jingling format to labelme format - points = [] - for i in range(1, int(len(polygon) / 2) + 1): - points.append( - [polygon['x' + str(i)], polygon['y' + str(i)]]) - shape = { - 'label': name, - 'points': points, - 'shape_type': 'polygon' - } - data_shapes.append(shape) - - if 'size' not in data: - continue - data_size = data['size'] - img_shape = (data_size['height'], data_size['width'], - data_size['depth']) - - lbl = shape2label( - img_size=img_shape, - shapes=data_shapes, - class_name_mapping=class_name_to_id, - ) - - if osp.splitext(out_png_file)[1] != '.png': - out_png_file += '.png' - # Assume label ranges [0, 255] for uint8, - if lbl.min() >= 0 and lbl.max() <= 255: - lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P') - lbl_pil.putpalette(color_map) - lbl_pil.save(out_png_file) - else: - raise ValueError( - '[%s] Cannot save the pixel-wise class label as PNG. ' - 'Please consider using the .npy format.' % out_png_file) - - -if __name__ == '__main__': - args = parse_args() - main(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/tools/labelme2seg.py b/legacy/contrib/NeurIPS_SN7/pdseg/tools/labelme2seg.py deleted file mode 100644 index 4770e20842..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/tools/labelme2seg.py +++ /dev/null @@ -1,125 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import argparse -import glob -import math -import json -import os -import os.path as osp -import numpy as np -import PIL.Image -import PIL.ImageDraw -import cv2 - -from gray2pseudo_color import get_color_map_list - - -def parse_args(): - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('input_dir', help='input annotated directory') - return parser.parse_args() - - -def main(args): - output_dir = osp.join(args.input_dir, 'annotations') - if not osp.exists(output_dir): - os.makedirs(output_dir) - print('Creating annotations directory:', output_dir) - - # get the all class names for the given dataset - class_names = ['_background_'] - for label_file in glob.glob(osp.join(args.input_dir, '*.json')): - with open(label_file) as f: - data = json.load(f) - for shape in data['shapes']: - label = shape['label'] - cls_name = label - if not cls_name in class_names: - class_names.append(cls_name) - - class_name_to_id = {} - for i, class_name in enumerate(class_names): - class_id = i # starts with 0 - class_name_to_id[class_name] = class_id - if class_id == 0: - assert class_name == '_background_' - class_names = tuple(class_names) - print('class_names:', class_names) - - out_class_names_file = osp.join(args.input_dir, 'class_names.txt') - with open(out_class_names_file, 'w') as f: - f.writelines('\n'.join(class_names)) - print('Saved class_names:', out_class_names_file) - - color_map = get_color_map_list(256) - - for label_file in glob.glob(osp.join(args.input_dir, '*.json')): - print('Generating dataset from:', label_file) - with open(label_file) as f: - base = osp.splitext(osp.basename(label_file))[0] - out_png_file = osp.join(output_dir, base + '.png') - - data = json.load(f) - - img_file = osp.join(osp.dirname(label_file), data['imagePath']) - img = np.asarray(cv2.imread(img_file)) - - lbl = shape2label( - img_size=img.shape, - shapes=data['shapes'], - class_name_mapping=class_name_to_id, - ) - - if osp.splitext(out_png_file)[1] != '.png': - out_png_file += '.png' - # Assume label ranges [0, 255] for uint8, - if lbl.min() >= 0 and lbl.max() <= 255: - lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P') - lbl_pil.putpalette(color_map) - lbl_pil.save(out_png_file) - else: - raise ValueError( - '[%s] Cannot save the pixel-wise class label as PNG. ' - 'Please consider using the .npy format.' % out_png_file) - - -def shape2mask(img_size, points): - label_mask = PIL.Image.fromarray(np.zeros(img_size[:2], dtype=np.uint8)) - image_draw = PIL.ImageDraw.Draw(label_mask) - points_list = [tuple(point) for point in points] - assert len(points_list) > 2, 'Polygon must have points more than 2' - image_draw.polygon(xy=points_list, outline=1, fill=1) - return np.array(label_mask, dtype=bool) - - -def shape2label(img_size, shapes, class_name_mapping): - label = np.zeros(img_size[:2], dtype=np.int32) - for shape in shapes: - points = shape['points'] - class_name = shape['label'] - shape_type = shape.get('shape_type', None) - class_id = class_name_mapping[class_name] - label_mask = shape2mask(img_size[:2], points) - label[label_mask] = class_id - return label - - -if __name__ == '__main__': - args = parse_args() - main(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/train.py b/legacy/contrib/NeurIPS_SN7/pdseg/train.py deleted file mode 100644 index b75341d258..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/train.py +++ /dev/null @@ -1,472 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys -import argparse -import pprint -import random -import shutil - -import paddle -import numpy as np -import paddle.fluid as fluid -from paddle.fluid import profiler - -from utils.config import cfg -from utils.timer import Timer, calculate_eta -from metrics import ConfusionMatrix -from reader import SegDataset -from models.model_builder import build_model -from models.model_builder import ModelPhase -from eval import evaluate -from vis import visualize -from utils import dist_utils -from utils.load_model_utils import load_pretrained_weights - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddleSeg training') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', - dest='use_gpu', - help='Use gpu or cpu', - action='store_true', - default=False) - parser.add_argument( - '--use_mpio', - dest='use_mpio', - help='Use multiprocess I/O or not', - action='store_true', - default=False) - parser.add_argument( - '--log_steps', - dest='log_steps', - help='Display logging information at every log_steps', - default=10, - type=int) - parser.add_argument( - '--debug', - dest='debug', - help='debug mode, display detail information of training', - action='store_true') - parser.add_argument( - '--use_vdl', - dest='use_vdl', - help='whether to record the data during training to VisualDL', - action='store_true') - parser.add_argument( - '--vdl_log_dir', - dest='vdl_log_dir', - help='VisualDL logging directory', - default=None, - type=str) - parser.add_argument( - '--do_eval', - dest='do_eval', - help='Evaluation models result on every new checkpoint', - action='store_true') - parser.add_argument( - 'opts', - help='See utils/config.py for all options', - default=None, - nargs=argparse.REMAINDER) - parser.add_argument( - '--enable_ce', - dest='enable_ce', - help='If set True, enable continuous evaluation job.' - 'This flag is only used for internal test.', - action='store_true') - - # NOTE: This for benchmark - parser.add_argument( - '--is_profiler', - help='the profiler switch.(used for benchmark)', - default=0, - type=int) - parser.add_argument( - '--profiler_path', - help='the profiler output file path.(used for benchmark)', - default='./seg.profiler', - type=str) - return parser.parse_args() - - -def save_checkpoint(program, ckpt_name): - """ - Save checkpoint for evaluation or resume training - """ - ckpt_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, str(ckpt_name)) - print("Save model checkpoint to {}".format(ckpt_dir)) - if not os.path.isdir(ckpt_dir): - os.makedirs(ckpt_dir) - - fluid.save(program, os.path.join(ckpt_dir, 'model')) - - return ckpt_dir - - -def load_checkpoint(exe, program): - """ - Load checkpoiont for resuming training - """ - model_path = cfg.TRAIN.RESUME_MODEL_DIR - print('Resume model training from:', model_path) - if not os.path.exists(model_path): - raise ValueError( - "TRAIN.PRETRAIN_MODEL {} not exist!".format(model_path)) - fluid.load(program, os.path.join(model_path, 'model'), exe) - - # Check is path ended by path spearator - if model_path[-1] == os.sep: - model_path = model_path[0:-1] - epoch_name = os.path.basename(model_path) - # If resume model is final model - if epoch_name == 'final': - begin_epoch = cfg.SOLVER.NUM_EPOCHS - # If resume model path is end of digit, restore epoch status - elif epoch_name.isdigit(): - epoch = int(epoch_name) - begin_epoch = epoch + 1 - else: - raise ValueError("Resume model path is not valid!") - print("Model checkpoint loaded successfully!") - return begin_epoch - - -def save_infer_program(test_program, ckpt_dir): - _test_program = test_program.clone() - _test_program.desc.flush() - _test_program.desc._set_version() - paddle.fluid.core.save_op_compatible_info(_test_program.desc) - with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f: - f.write(_test_program.desc.serialize_to_string()) - - -def update_best_model(ckpt_dir): - best_model_dir = os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model') - if os.path.exists(best_model_dir): - shutil.rmtree(best_model_dir) - shutil.copytree(ckpt_dir, best_model_dir) - - -def print_info(*msg): - if cfg.TRAINER_ID == 0: - print(*msg) - - -def train(cfg): - startup_prog = fluid.Program() - train_prog = fluid.Program() - test_prog = fluid.Program() - if args.enable_ce: - startup_prog.random_seed = 1000 - train_prog.random_seed = 1000 - drop_last = True - - dataset = SegDataset( - file_list=cfg.DATASET.TRAIN_FILE_LIST, - mode=ModelPhase.TRAIN, - shuffle=True, - data_dir=cfg.DATASET.DATA_DIR) - - def data_generator(): - if args.use_mpio: - data_gen = dataset.multiprocess_generator( - num_processes=cfg.DATALOADER.NUM_WORKERS, - max_queue_size=cfg.DATALOADER.BUF_SIZE) - else: - data_gen = dataset.generator() - - batch_data = [] - for b in data_gen: - batch_data.append(b) - if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): - for item in batch_data: - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - yield item[0], item[1], item[2] - else: - yield item[0], item[1], item[2], item[3] - batch_data = [] - # If use sync batch norm strategy, drop last batch if number of samples - # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues - if not cfg.TRAIN.SYNC_BATCH_NORM: - for item in batch_data: - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - yield item[0], item[1], item[2] - else: - yield item[0], item[1], item[2], item[3] - - # Get device environment - gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) - place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() - places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() - - # Get number of GPU - dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) - print_info("#Device count: {}".format(dev_count)) - - # Make sure BATCH_SIZE can divided by GPU cards - assert cfg.BATCH_SIZE % dev_count == 0, ( - 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( - cfg.BATCH_SIZE, dev_count)) - # If use multi-gpu training mode, batch data will allocated to each GPU evenly - batch_size_per_dev = cfg.BATCH_SIZE // dev_count - print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - - data_loader, avg_loss, lr, pred, grts, masks = build_model( - train_prog, startup_prog, phase=ModelPhase.TRAIN) - build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL) - data_loader.set_sample_generator( - data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) - - exe = fluid.Executor(place) - exe.run(startup_prog) - - exec_strategy = fluid.ExecutionStrategy() - # Clear temporary variables every 100 iteration - if args.use_gpu: - exec_strategy.num_threads = fluid.core.get_cuda_device_count() - exec_strategy.num_iteration_per_drop_scope = 100 - build_strategy = fluid.BuildStrategy() - - if cfg.NUM_TRAINERS > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) - exec_strategy.num_threads = 1 - - if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: - if dev_count > 1: - # Apply sync batch norm strategy - print_info("Sync BatchNorm strategy is effective.") - build_strategy.sync_batch_norm = True - else: - print_info( - "Sync BatchNorm strategy will not be effective if GPU device" - " count <= 1") - compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( - loss_name=avg_loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) - - # Resume training - begin_epoch = cfg.SOLVER.BEGIN_EPOCH - if cfg.TRAIN.RESUME_MODEL_DIR: - begin_epoch = load_checkpoint(exe, train_prog) - # Load pretrained model - elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): - load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) - else: - print_info( - 'Pretrained model dir {} not exists, training from scratch...'. - format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) - - fetch_list = [avg_loss.name, lr.name] - if args.debug: - # Fetch more variable info and use streaming confusion matrix to - # calculate IoU results if in debug mode - np.set_printoptions( - precision=4, suppress=True, linewidth=160, floatmode="fixed") - fetch_list.extend([pred.name, grts.name, masks.name]) - cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) - - if args.use_vdl: - if not args.vdl_log_dir: - print_info("Please specify the log directory by --vdl_log_dir.") - exit(1) - - from visualdl import LogWriter - log_writer = LogWriter(args.vdl_log_dir) - - # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) - # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - step = 0 - all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE - if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: - all_step += 1 - all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) - - avg_loss = 0.0 - best_mIoU = 0.0 - - timer = Timer() - timer.start() - if begin_epoch > cfg.SOLVER.NUM_EPOCHS: - raise ValueError( - ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( - begin_epoch, cfg.SOLVER.NUM_EPOCHS)) - - if args.use_mpio: - print_info("Use multiprocess reader") - else: - print_info("Use multi-thread reader") - - for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - data_loader.start() - while True: - try: - if args.debug: - # Print category IoU and accuracy to check whether the - # traning process is corresponed to expectation - loss, lr, pred, grts, masks = exe.run( - program=compiled_train_prog, - fetch_list=fetch_list, - return_numpy=True) - cm.calculate(pred, grts, masks) - avg_loss += np.mean(np.array(loss)) - step += 1 - - if step % args.log_steps == 0: - speed = args.log_steps / timer.elapsed_time() - avg_loss /= args.log_steps - category_acc, mean_acc = cm.accuracy() - category_iou, mean_iou = cm.mean_iou() - - print_info(( - "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" - ).format(epoch, step, lr[0], avg_loss, mean_acc, - mean_iou, speed, - calculate_eta(all_step - step, speed))) - print_info("Category IoU: ", category_iou) - print_info("Category Acc: ", category_acc) - if args.use_vdl: - log_writer.add_scalar('Train/mean_iou', mean_iou, - step) - log_writer.add_scalar('Train/mean_acc', mean_acc, - step) - log_writer.add_scalar('Train/loss', avg_loss, step) - log_writer.add_scalar('Train/lr', lr[0], step) - log_writer.add_scalar('Train/step/sec', speed, step) - sys.stdout.flush() - avg_loss = 0.0 - cm.zero_matrix() - timer.restart() - else: - # If not in debug mode, avoid unnessary log and calculate - loss, lr = exe.run( - program=compiled_train_prog, - fetch_list=fetch_list, - return_numpy=True) - avg_loss += np.mean(np.array(loss)) - step += 1 - - if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: - avg_loss /= args.log_steps - speed = args.log_steps / timer.elapsed_time() - print(( - "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" - ).format(epoch, step, lr[0], avg_loss, speed, - calculate_eta(all_step - step, speed))) - if args.use_vdl: - log_writer.add_scalar('Train/loss', avg_loss, step) - log_writer.add_scalar('Train/lr', lr[0], step) - log_writer.add_scalar('Train/speed', speed, step) - sys.stdout.flush() - avg_loss = 0.0 - timer.restart() - - # NOTE : used for benchmark, profiler tools - if args.is_profiler and epoch == 1 and step == args.log_steps: - profiler.start_profiler("All") - elif args.is_profiler and epoch == 1 and step == args.log_steps + 5: - profiler.stop_profiler("total", args.profiler_path) - return - - except fluid.core.EOFException: - data_loader.reset() - break - except Exception as e: - print(e) - - if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 - or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: - ckpt_dir = save_checkpoint(train_prog, epoch) - save_infer_program(test_prog, ckpt_dir) - - if args.do_eval: - print("Evaluation start") - cate_iou, mean_iou, _, mean_acc = evaluate( - cfg=cfg, - ckpt_dir=ckpt_dir, - use_gpu=args.use_gpu, - use_mpio=args.use_mpio) - if args.use_vdl: - log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) - log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) - - if cate_iou[0] >= best_mIoU: - best_mIoU = cate_iou[0] - update_best_model(ckpt_dir) - print_info("Save best model {} to {}, mIoU = {:.4f}".format( - ckpt_dir, - os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), - mean_iou)) - - # Use VisualDL to visualize results - if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: - visualize( - cfg=cfg, - use_gpu=args.use_gpu, - vis_file_list=cfg.DATASET.VIS_FILE_LIST, - vis_dir="visual", - ckpt_dir=ckpt_dir, - log_writer=log_writer) - - # save final model - if cfg.TRAINER_ID == 0: - ckpt_dir = save_checkpoint(train_prog, 'final') - save_infer_program(test_prog, ckpt_dir) - - -def main(args): - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - if args.enable_ce: - random.seed(0) - np.random.seed(0) - - cfg.TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", 0)) - cfg.NUM_TRAINERS = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - - cfg.check_and_infer() - print_info(pprint.pformat(cfg)) - train(cfg) - - -if __name__ == '__main__': - args = parse_args() - if fluid.core.is_compiled_with_cuda() != True and args.use_gpu == True: - print( - "You can not set use_gpu = True in the model because you are using paddlepaddle-cpu." - ) - print( - "Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_gpu=False to run models on CPU." - ) - sys.exit(1) - main(args) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/__init__.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/__init__.py deleted file mode 100644 index ba1baeb1ac..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/collect.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/collect.py deleted file mode 100644 index 9cde75bbb3..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/collect.py +++ /dev/null @@ -1,177 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A simple attribute dictionary used for representing configuration options.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import copy -import codecs -from ast import literal_eval - -import yaml -import six - - -class SegConfig(dict): - def __init__(self, *args, **kwargs): - super(SegConfig, self).__init__(*args, **kwargs) - self.immutable = False - - def __setattr__(self, key, value, create_if_not_exist=True): - if key in ["immutable"]: - self.__dict__[key] = value - return - - t = self - keylist = key.split(".") - for k in keylist[:-1]: - t = t.__getattr__(k, create_if_not_exist) - - t.__getattr__(keylist[-1], create_if_not_exist) - t[keylist[-1]] = value - - def __getattr__(self, key, create_if_not_exist=True): - if key in ["immutable"]: - return self.__dict__[key] - - if not key in self: - if not create_if_not_exist: - raise KeyError - self[key] = SegConfig() - return self[key] - - def __setitem__(self, key, value): - # - if self.immutable: - raise AttributeError( - 'Attempted to set "{}" to "{}", but SegConfig is immutable'. - format(key, value)) - # - if isinstance(value, six.string_types): - try: - value = literal_eval(value) - except ValueError: - pass - except SyntaxError: - pass - super(SegConfig, self).__setitem__(key, value) - - def update_from_segconfig(self, other): - if isinstance(other, dict): - other = SegConfig(other) - assert isinstance(other, SegConfig) - diclist = [("", other)] - while len(diclist): - prefix, tdic = diclist[0] - diclist = diclist[1:] - for key, value in tdic.items(): - key = "{}.{}".format(prefix, key) if prefix else key - if isinstance(value, dict): - diclist.append((key, value)) - continue - try: - self.__setattr__(key, value, create_if_not_exist=False) - except KeyError: - raise KeyError('Non-existent config key: {}'.format(key)) - - def check_and_infer(self): - if self.DATASET.IMAGE_TYPE in ['rgb', 'gray']: - self.DATASET.DATA_DIM = 3 - elif self.DATASET.IMAGE_TYPE in ['rgba']: - self.DATASET.DATA_DIM = 4 - else: - raise KeyError( - 'DATASET.IMAGE_TYPE config error, only support `rgb`, `gray` and `rgba`' - ) - if self.MEAN is not None: - self.DATASET.PADDING_VALUE = [x * 255.0 for x in self.MEAN] - - if not self.TRAIN_CROP_SIZE: - raise ValueError( - 'TRAIN_CROP_SIZE is empty! Please set a pair of values in format (width, height)' - ) - - if not self.EVAL_CROP_SIZE: - raise ValueError( - 'EVAL_CROP_SIZE is empty! Please set a pair of values in format (width, height)' - ) - - # Ensure file list is use UTF-8 encoding - train_sets = codecs.open(self.DATASET.TRAIN_FILE_LIST, 'r', - 'utf-8').readlines() - val_sets = codecs.open(self.DATASET.VAL_FILE_LIST, 'r', - 'utf-8').readlines() - test_sets = codecs.open(self.DATASET.TEST_FILE_LIST, 'r', - 'utf-8').readlines() - self.DATASET.TRAIN_TOTAL_IMAGES = len(train_sets) - self.DATASET.VAL_TOTAL_IMAGES = len(val_sets) - self.DATASET.TEST_TOTAL_IMAGES = len(test_sets) - - if self.MODEL.MODEL_NAME == 'icnet' and \ - len(self.MODEL.MULTI_LOSS_WEIGHT) != 3: - self.MODEL.MULTI_LOSS_WEIGHT = [1.0, 0.4, 0.16] - - if self.AUG.AUG_METHOD not in [ - 'unpadding', 'stepscaling', 'rangescaling' - ]: - raise ValueError( - 'AUG.AUG_METHOD config error, only support `unpadding`, `unpadding` and `rangescaling`' - ) - - if self.DATASET.INPUT_IMAGE_NUM not in [1, 2]: - raise ValueError("DATASET.INPUT_IMAGE_NUM should be 1 or 2") - - if self.TEST.TEST_AUG: - if len(self.TEST.TEST_AUG_FLIP_OPS) + len( - self.TEST.TEST_AUG_ROTATE_OPS) == 0: - raise ValueError('Must define one ops at least for test aug.') - for ops in self.TEST.TEST_AUG_FLIP_OPS: - if ops[0] not in ['h', 'v', 'm', 'b']: - raise ValueError( - 'Error flip ops type. ' - 'Only horizontal, vertical, main_diagonal, back_diagonal supported.' - ) - for ops in self.TEST.TEST_AUG_ROTATE_OPS: - if ops not in [90, 180, 270]: - raise ValueError( - 'Error rotation angle. Only 90, 180, 270 supported.') - - def update_from_list(self, config_list): - if len(config_list) % 2 != 0: - raise ValueError( - "Command line options config format error! Please check it: {}". - format(config_list)) - for key, value in zip(config_list[0::2], config_list[1::2]): - try: - self.__setattr__(key, value, create_if_not_exist=False) - except KeyError: - raise KeyError('Non-existent config key: {}'.format(key)) - - def update_from_file(self, config_file): - with codecs.open(config_file, 'r', 'utf-8') as file: - dic = yaml.load(file, Loader=yaml.FullLoader) - self.update_from_segconfig(dic) - - def set_immutable(self, immutable): - self.immutable = immutable - for value in self.values(): - if isinstance(value, SegConfig): - value.set_immutable(immutable) - - def is_immutable(self): - return self.immutable diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/config.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/config.py deleted file mode 100644 index 5f21a0c3d8..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/config.py +++ /dev/null @@ -1,274 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -from __future__ import unicode_literals -from utils.collect import SegConfig -import numpy as np - -cfg = SegConfig() - -########################## 基本配置 ########################################### -# 均值,图像预处理减去的均值 -cfg.MEAN = [0.5, 0.5, 0.5] -# 标准差,图像预处理除以标准差· -cfg.STD = [0.5, 0.5, 0.5] -# 批处理大小 -cfg.BATCH_SIZE = 1 -# 验证时图像裁剪尺寸(宽,高) -cfg.EVAL_CROP_SIZE = tuple() -# 训练时图像裁剪尺寸(宽,高) -cfg.TRAIN_CROP_SIZE = tuple() -# 多进程训练总进程数 -cfg.NUM_TRAINERS = 1 -# 多进程训练进程ID -cfg.TRAINER_ID = 0 -########################## 数据载入配置 ####################################### -# 数据载入时的并发数, 建议值8 -cfg.DATALOADER.NUM_WORKERS = 8 -# 数据载入时缓存队列大小, 建议值256 -cfg.DATALOADER.BUF_SIZE = 256 - -########################## 数据集配置 ######################################### -# 输入为 img + label or img1 + img2 + label or img1 + img2 + label1 + label2 -cfg.DATASET.INPUT_IMAGE_NUM = 1 -# 数据主目录目录 -cfg.DATASET.DATA_DIR = './dataset/cityscapes/' -# 训练集列表 -cfg.DATASET.TRAIN_FILE_LIST = './dataset/cityscapes/train.list' -# 训练集数量 -cfg.DATASET.TRAIN_TOTAL_IMAGES = 2975 -# 验证集列表 -cfg.DATASET.VAL_FILE_LIST = './dataset/cityscapes/val.list' -# 验证数据数量 -cfg.DATASET.VAL_TOTAL_IMAGES = 500 -# 测试数据列表 -cfg.DATASET.TEST_FILE_LIST = './dataset/cityscapes/test.list' -# 测试数据数量 -cfg.DATASET.TEST_TOTAL_IMAGES = 500 -# VisualDL 可视化的数据集 -cfg.DATASET.VIS_FILE_LIST = None -# 类别数(需包括背景类) -cfg.DATASET.NUM_CLASSES = 19 -# 输入图像类型, 支持三通道'rgb',四通道'rgba',单通道灰度图'gray' -cfg.DATASET.IMAGE_TYPE = 'rgb' -# 输入图片的通道数 -cfg.DATASET.DATA_DIM = 3 -# 数据列表分割符, 默认为空格 -cfg.DATASET.SEPARATOR = ' ' -# 忽略的像素标签值, 默认为255,一般无需改动 -cfg.DATASET.IGNORE_INDEX = 255 -# 数据增强是图像的padding值 -cfg.DATASET.PADDING_VALUE = [127.5, 127.5, 127.5] - -########################### 数据增强配置 ###################################### -# 图像resize的方式有三种: -# unpadding(固定尺寸),stepscaling(按比例resize),rangescaling(长边对齐) -cfg.AUG.AUG_METHOD = 'unpadding' -# 图像resize的固定尺寸(宽,高),非负 -cfg.AUG.FIX_RESIZE_SIZE = (512, 512) -# 图像resize方式为stepscaling,resize最小尺度,非负 -cfg.AUG.MIN_SCALE_FACTOR = 0.5 -# 图像resize方式为stepscaling,resize最大尺度,不小于MIN_SCALE_FACTOR -cfg.AUG.MAX_SCALE_FACTOR = 2.0 -# 图像resize方式为stepscaling,resize尺度范围间隔,非负 -cfg.AUG.SCALE_STEP_SIZE = 0.25 -# 图像resize方式为rangescaling,训练时长边resize的范围最小值,非负 -cfg.AUG.MIN_RESIZE_VALUE = 400 -# 图像resize方式为rangescaling,训练时长边resize的范围最大值, -# 不小于MIN_RESIZE_VALUE -cfg.AUG.MAX_RESIZE_VALUE = 600 -# 图像resize方式为rangescaling, 测试验证可视化模式下长边resize的长度, -# 在MIN_RESIZE_VALUE到MAX_RESIZE_VALUE范围内 -cfg.AUG.INF_RESIZE_VALUE = 500 - -# 图像镜像左右翻转 -cfg.AUG.MIRROR = True -# 图像上下翻转开关,True/False -cfg.AUG.FLIP = False -# 图像启动上下翻转的概率,0-1 -cfg.AUG.FLIP_RATIO = 0.5 -# 图像随机旋转0-270度 -cfg.AUG.RANDOM_ROTATION90 = False - -# RichCrop数据增广开关,用于提升模型鲁棒性 -cfg.AUG.RICH_CROP.ENABLE = False -# 图像旋转最大角度,0-90 -cfg.AUG.RICH_CROP.MAX_ROTATION = 15 -# 裁取图像与原始图像面积比,0-1 -cfg.AUG.RICH_CROP.MIN_AREA_RATIO = 0.5 -# 裁取图像宽高比范围,非负 -cfg.AUG.RICH_CROP.ASPECT_RATIO = 0.33 -# 亮度调节范围,0-1 -cfg.AUG.RICH_CROP.BRIGHTNESS_JITTER_RATIO = 0.5 -# 饱和度调节范围,0-1 -cfg.AUG.RICH_CROP.SATURATION_JITTER_RATIO = 0.5 -# 对比度调节范围,0-1 -cfg.AUG.RICH_CROP.CONTRAST_JITTER_RATIO = 0.5 -# 图像模糊开关,True/False -cfg.AUG.RICH_CROP.BLUR = False -# 图像启动模糊百分比,0-1 -cfg.AUG.RICH_CROP.BLUR_RATIO = 0.1 - -########################### 训练配置 ########################################## -# 模型保存路径 -cfg.TRAIN.MODEL_SAVE_DIR = '' -# 预训练模型路径 -cfg.TRAIN.PRETRAINED_MODEL_DIR = '' -# 是否resume,继续训练 -cfg.TRAIN.RESUME_MODEL_DIR = '' -# 是否使用多卡间同步BatchNorm均值和方差 -cfg.TRAIN.SYNC_BATCH_NORM = False -# 模型参数保存的epoch间隔数,可用来继续训练中断的模型 -cfg.TRAIN.SNAPSHOT_EPOCH = 1 - -########################### 模型优化相关配置 ################################## -# 初始学习率 -cfg.SOLVER.LR = 0.1 -# 学习率下降方法, 支持poly piecewise cosine 三种 -cfg.SOLVER.LR_POLICY = "poly" -# 优化算法, 支持SGD和Adam两种算法 -cfg.SOLVER.OPTIMIZER = "sgd" -# 动量参数 -cfg.SOLVER.MOMENTUM = 0.9 -# 二阶矩估计的指数衰减率 -cfg.SOLVER.MOMENTUM2 = 0.999 -# 学习率Poly下降指数 -cfg.SOLVER.POWER = 0.9 -# step下降指数 -cfg.SOLVER.GAMMA = 0.1 -# step下降间隔 -cfg.SOLVER.DECAY_EPOCH = [10, 20] -# 学习率权重衰减,0-1 -cfg.SOLVER.WEIGHT_DECAY = 0.00004 -# 训练开始epoch数,默认为1 -cfg.SOLVER.BEGIN_EPOCH = 1 -# 训练epoch数,正整数 -cfg.SOLVER.NUM_EPOCHS = 30 -# loss的选择,支持softmax_loss, bce_loss, dice_loss -cfg.SOLVER.LOSS = ["softmax_loss"] -# loss的权重,用于多loss组合加权使用,仅对SOLVER.LOSS内包含的loss生效 -cfg.SOLVER.LOSS_WEIGHT.SOFTMAX_LOSS = 1 -cfg.SOLVER.LOSS_WEIGHT.DICE_LOSS = 1 -cfg.SOLVER.LOSS_WEIGHT.BCE_LOSS = 1 -cfg.SOLVER.LOSS_WEIGHT.LOVASZ_HINGE_LOSS = 1 -cfg.SOLVER.LOSS_WEIGHT.LOVASZ_SOFTMAX_LOSS = 1 -# 是否开启warmup学习策略 -cfg.SOLVER.LR_WARMUP = False -# warmup的迭代次数 -cfg.SOLVER.LR_WARMUP_STEPS = 2000 -# cross entropy weight, 默认为None,如果设置为'dynamic',会根据每个batch中各个类别的数目, -# 动态调整类别权重。 -# 也可以设置一个静态权重(list的方式),比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9] -cfg.SOLVER.CROSS_ENTROPY_WEIGHT = None -########################## 测试配置 ########################################### -# 测试模型路径 -cfg.TEST.TEST_MODEL = '' -cfg.TEST.TEST_AUG = False -cfg.TEST.TEST_AUG_FLIP_OPS = [ - 'horizontal', 'vertical', 'main_diagonal', 'back_diagonal' -] -cfg.TEST.TEST_AUG_ROTATE_OPS = [90, 180, 270] - -########################## 可视化配置 ########################################### -cfg.VIS.ADD_LABEL = True -cfg.VIS.RAW_PRED = False -cfg.VIS.SEG_FOR_CD = False -cfg.VIS.VISINEVAL = False - -########################## 模型通用配置 ####################################### -# 模型名称, 已支持deeplabv3p, unet, icnet,pspnet,hrnet -cfg.MODEL.MODEL_NAME = '' -# BatchNorm类型: bn、gn(group_norm) -cfg.MODEL.DEFAULT_NORM_TYPE = 'bn' -# 多路损失加权值 -cfg.MODEL.MULTI_LOSS_WEIGHT = [1.0] -# DEFAULT_NORM_TYPE为gn时group数 -cfg.MODEL.DEFAULT_GROUP_NUMBER = 32 -# 极小值, 防止分母除0溢出,一般无需改动 -cfg.MODEL.DEFAULT_EPSILON = 1e-5 -# BatchNorm动量, 一般无需改动 -cfg.MODEL.BN_MOMENTUM = 0.99 -# 是否使用FP16训练 -cfg.MODEL.FP16 = False -# 混合精度训练需对LOSS进行scale, 默认为动态scale,静态scale可以设置为512.0 -cfg.MODEL.SCALE_LOSS = "DYNAMIC" - -########################## DeepLab模型配置 #################################### -# DeepLab backbone 配置, 可选项xception_65, mobilenetv2 -cfg.MODEL.DEEPLAB.BACKBONE = "xception_65" -# DeepLab output stride -cfg.MODEL.DEEPLAB.OUTPUT_STRIDE = 16 -# MobileNet v2 backbone scale 设置 -cfg.MODEL.DEEPLAB.DEPTH_MULTIPLIER = 1.0 -# MobileNet v2 backbone scale 设置 -cfg.MODEL.DEEPLAB.ENCODER_WITH_ASPP = True -# MobileNet v2 backbone scale 设置 -cfg.MODEL.DEEPLAB.ENABLE_DECODER = True -# ASPP是否使用可分离卷积 -cfg.MODEL.DEEPLAB.ASPP_WITH_SEP_CONV = True -# 解码器是否使用可分离卷积 -cfg.MODEL.DEEPLAB.DECODER_USE_SEP_CONV = True - -########################## UNET模型配置 ####################################### -# 上采样方式, 默认为双线性插值 -cfg.MODEL.UNET.UPSAMPLE_MODE = 'bilinear' - -########################## ICNET模型配置 ###################################### -# RESNET backbone scale 设置 -cfg.MODEL.ICNET.DEPTH_MULTIPLIER = 0.5 -# RESNET 层数 设置 -cfg.MODEL.ICNET.LAYERS = 50 - -########################## PSPNET模型配置 ###################################### -# RESNET backbone scale 设置 -cfg.MODEL.PSPNET.DEPTH_MULTIPLIER = 1 -# RESNET backbone 层数 设置 -cfg.MODEL.PSPNET.LAYERS = 50 - -########################## HRNET模型配置 ###################################### -# HRNET STAGE2 设置 -cfg.MODEL.HRNET.STAGE2.NUM_MODULES = 1 -cfg.MODEL.HRNET.STAGE2.NUM_CHANNELS = [40, 80] -# HRNET STAGE3 设置 -cfg.MODEL.HRNET.STAGE3.NUM_MODULES = 4 -cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160] -# HRNET STAGE4 设置 -cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3 -cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320] - -########################## 预测部署模型配置 ################################### -# 预测保存的模型名称 -cfg.FREEZE.MODEL_FILENAME = '__model__' -# 预测保存的参数名称 -cfg.FREEZE.PARAMS_FILENAME = '__params__' -# 预测模型参数保存的路径 -cfg.FREEZE.SAVE_DIR = 'freeze_model' - -########################## paddle-slim ###################################### -cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER = False -cfg.SLIM.KNOWLEDGE_DISTILL = False -cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR = "" - -cfg.SLIM.NAS_PORT = 23333 -cfg.SLIM.NAS_ADDRESS = "" -cfg.SLIM.NAS_SEARCH_STEPS = 100 -cfg.SLIM.NAS_START_EVAL_EPOCH = 0 -cfg.SLIM.NAS_IS_SERVER = True -cfg.SLIM.NAS_SPACE_NAME = "" - -cfg.SLIM.PRUNE_PARAMS = '' -cfg.SLIM.PRUNE_RATIOS = [] -cfg.SLIM.PREPROCESS = False diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/dist_utils.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/dist_utils.py deleted file mode 100644 index ce256e8c73..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/dist_utils.py +++ /dev/null @@ -1,92 +0,0 @@ -#Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import paddle.fluid as fluid - - -def nccl2_prepare(args, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - - envs = args.dist_env - - t.transpile( - envs["trainer_id"], - trainers=','.join(envs["trainer_endpoints"]), - current_endpoint=envs["current_endpoint"], - startup_program=startup_prog, - program=main_prog) - - -def pserver_prepare(args, train_prog, startup_prog): - config = fluid.DistributeTranspilerConfig() - config.slice_var_up = args.split_var - t = fluid.DistributeTranspiler(config=config) - envs = args.dist_env - training_role = envs["training_role"] - - t.transpile( - envs["trainer_id"], - program=train_prog, - pservers=envs["pserver_endpoints"], - trainers=envs["num_trainers"], - sync_mode=not args.async_mode, - startup_program=startup_prog) - if training_role == "PSERVER": - pserver_program = t.get_pserver_program(envs["current_endpoint"]) - pserver_startup_program = t.get_startup_program( - envs["current_endpoint"], - pserver_program, - startup_program=startup_prog) - return pserver_program, pserver_startup_program - elif training_role == "TRAINER": - train_program = t.get_trainer_program() - return train_program, startup_prog - else: - raise ValueError( - 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' - ) - - -def nccl2_prepare_paddle(trainer_id, startup_prog, main_prog): - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile( - trainer_id, - trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'), - current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'), - startup_program=startup_prog, - program=main_prog) - - -def prepare_for_multi_process(exe, build_strategy, train_prog): - # prepare for multi-process - trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0)) - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if num_trainers < 2: return - - build_strategy.num_trainers = num_trainers - build_strategy.trainer_id = trainer_id - # NOTE(zcd): use multi processes to train the model, - # and each process use one GPU card. - startup_prog = fluid.Program() - nccl2_prepare_paddle(trainer_id, startup_prog, train_prog) - # the startup_prog are run two times, but it doesn't matter. - exe.run(startup_prog) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/fp16_utils.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/fp16_utils.py deleted file mode 100644 index 7359cf4aca..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/fp16_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -from paddle import fluid - - -def load_fp16_vars(executor, dirname, program): - load_dirname = os.path.normpath(dirname) - - def _if_exist(var): - name = var.name[:-7] if var.name.endswith('.master') else var.name - b = os.path.exists(os.path.join(load_dirname, name)) - if not b and isinstance(var, fluid.framework.Parameter): - print("===== {} not found ====".format(var.name)) - return b - - load_prog = fluid.Program() - load_block = load_prog.global_block() - vars = list(filter(_if_exist, program.list_vars())) - - for var in vars: - new_var = fluid.io._clone_var_in_block_(load_block, var) - name = var.name[:-7] if var.name.endswith('.master') else var.name - file_path = os.path.join(load_dirname, name) - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [new_var]}, - attrs={ - 'file_path': file_path, - 'load_as_fp16': var.dtype == fluid.core.VarDesc.VarType.FP16 - }) - - executor.run(load_prog) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/load_model_utils.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/load_model_utils.py deleted file mode 100644 index 9166744184..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/load_model_utils.py +++ /dev/null @@ -1,128 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp - -import six -import numpy as np - - -def parse_param_file(param_file, return_shape=True): - from paddle.fluid.proto.framework_pb2 import VarType - f = open(param_file, 'rb') - version = np.fromstring(f.read(4), dtype='int32') - lod_level = np.fromstring(f.read(8), dtype='int64') - for i in range(int(lod_level)): - _size = np.fromstring(f.read(8), dtype='int64') - _ = f.read(_size) - version = np.fromstring(f.read(4), dtype='int32') - tensor_desc = VarType.TensorDesc() - tensor_desc_size = np.fromstring(f.read(4), dtype='int32') - tensor_desc.ParseFromString(f.read(int(tensor_desc_size))) - tensor_shape = tuple(tensor_desc.dims) - if return_shape: - f.close() - return tuple(tensor_desc.dims) - if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) - data_size = 4 - for i in range(len(tensor_shape)): - data_size *= tensor_shape[i] - weight = np.fromstring(f.read(data_size), dtype='float32') - f.close() - return np.reshape(weight, tensor_shape) - - -def load_pdparams(exe, main_prog, model_dir): - import paddle.fluid as fluid - from paddle.fluid.proto.framework_pb2 import VarType - from paddle.fluid.framework import Program - - vars_to_load = list() - vars_not_load = list() - import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: - params_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - unused_vars = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if var.name not in params_dict: - print("{} is not in saved model".format(var.name)) - vars_not_load.append(var.name) - continue - if var.shape != params_dict[var.name].shape: - unused_vars.append(var.name) - vars_not_load.append(var.name) - print( - "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})" - .format(var.name, params_dict[var.name].shape, var.shape)) - continue - vars_to_load.append(var) - for var_name in unused_vars: - del params_dict[var_name] - fluid.io.set_program_state(main_prog, params_dict) - - if len(vars_to_load) == 0: - print( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - print("There are {}/{} varaibles in {} are loaded.".format( - len(vars_to_load), - len(vars_to_load) + len(vars_not_load), model_dir)) - - -def load_pretrained_weights(exe, main_prog, weights_dir): - if not osp.exists(weights_dir): - raise Exception("Path {} not exists.".format(weights_dir)) - if osp.exists(osp.join(weights_dir, "model.pdparams")): - return load_pdparams(exe, main_prog, weights_dir) - import paddle.fluid as fluid - vars_to_load = list() - vars_not_load = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if not osp.exists(osp.join(weights_dir, var.name)): - print("[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) - vars_not_load.append(var) - continue - pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) - actual_shape = tuple(var.shape) - if pretrained_shape != actual_shape: - print( - "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})" - .format(weights_dir, var.name, pretrained_shape, actual_shape)) - vars_not_load.append(var) - continue - vars_to_load.append(var) - - params_dict = fluid.io.load_program_state( - weights_dir, var_list=vars_to_load) - fluid.io.set_program_state(main_prog, params_dict) - - if len(vars_to_load) == 0: - print( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - print("There are {}/{} varaibles in {} are loaded.".format( - len(vars_to_load), - len(vars_to_load) + len(vars_not_load), weights_dir)) diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/utils/timer.py b/legacy/contrib/NeurIPS_SN7/pdseg/utils/timer.py deleted file mode 100644 index 6cdd6014c2..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/utils/timer.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time - - -def calculate_eta(remaining_step, speed): - if remaining_step < 0: - remaining_step = 0 - remaining_time = int(remaining_step / speed) - result = "{:0>2}:{:0>2}:{:0>2}" - arr = [] - for i in range(2, -1, -1): - arr.append(int(remaining_time / 60**i)) - remaining_time %= 60**i - return result.format(*arr) - - -class Timer(object): - """ Simple timer class for measuring time consuming """ - - def __init__(self): - self._start_time = 0.0 - self._end_time = 0.0 - self._elapsed_time = 0.0 - self._is_running = False - - def start(self): - self._is_running = True - self._start_time = time.time() - - def restart(self): - self.start() - - def stop(self): - self._is_running = False - self._end_time = time.time() - - def elapsed_time(self): - self._end_time = time.time() - self._elapsed_time = self._end_time - self._start_time - if not self.is_running: - return 0.0 - - return self._elapsed_time - - @property - def is_running(self): - return self._is_running diff --git a/legacy/contrib/NeurIPS_SN7/pdseg/vis.py b/legacy/contrib/NeurIPS_SN7/pdseg/vis.py deleted file mode 100644 index cc2fcb6051..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pdseg/vis.py +++ /dev/null @@ -1,267 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -# GPU memory garbage collection optimization flags -os.environ['FLAGS_eager_delete_tensor_gb'] = "0.0" - -import sys -import argparse -import pprint -import cv2 -import numpy as np -import paddle.fluid as fluid - -from PIL import Image as PILImage -from utils.config import cfg -from reader import SegDataset -from models.model_builder import build_model -from models.model_builder import ModelPhase -from tools.gray2pseudo_color import get_color_map_list - - -def parse_args(): - parser = argparse.ArgumentParser(description='PaddeSeg visualization tools') - parser.add_argument( - '--cfg', - dest='cfg_file', - help='Config file for training (and optionally testing)', - default=None, - type=str) - parser.add_argument( - '--use_gpu', dest='use_gpu', help='Use gpu or cpu', action='store_true') - parser.add_argument( - '--vis_dir', - dest='vis_dir', - help='visual save dir', - type=str, - default='visual') - parser.add_argument( - '--local_test', - dest='local_test', - help='if in local test mode, only visualize 5 images for testing', - action='store_true') - parser.add_argument( - 'opts', - help='See config.py for all options', - default=None, - nargs=argparse.REMAINDER) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def makedirs(directory): - if not os.path.exists(directory): - os.makedirs(directory) - - -def to_png_fn(fn): - """ - Append png as filename postfix - """ - directory, filename = os.path.split(fn) - basename, ext = os.path.splitext(filename) - - return basename + ".png" - - -def visualize(cfg, - vis_file_list=None, - use_gpu=False, - vis_dir="visual", - ckpt_dir=None, - log_writer=None, - local_test=False, - **kwargs): - if vis_file_list is None: - vis_file_list = cfg.DATASET.TEST_FILE_LIST - dataset = SegDataset( - file_list=vis_file_list, - mode=ModelPhase.VISUAL, - data_dir=cfg.DATASET.DATA_DIR) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) - # Clone forward graph - test_prog = test_prog.clone(for_test=True) - - # Generator full colormap for maximum 256 classes - color_map = get_color_map_list( - cfg.DATASET.NUM_CLASSES**2 if cfg.DATASET.NUM_CLASSES**2 < 256 else 256) - - # Get device environment - place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir - - if ckpt_dir is not None: - print('load test model:', ckpt_dir) - try: - fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) - except: - fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) - - save_dir = vis_dir - makedirs(save_dir) - - fetch_list = [pred.name] - test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) - img_cnt = 0 - - def exe_run(): - if cfg.DATASET.INPUT_IMAGE_NUM == 1: - for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: - pred_shape = (imgs.shape[2], imgs.shape[3]) - pred, = exe.run( - program=test_prog, - feed={'image1': imgs}, - fetch_list=fetch_list, - return_numpy=True) - yield pred, pred_shape, grts, img_names, valid_shapes, org_shapes - else: - for img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes in test_reader: - pred_shape = (img1s.shape[2], img1s.shape[3]) - pred, = exe.run( - program=test_prog, - feed={ - 'image1': img1s, - 'image2': img2s - }, - fetch_list=fetch_list, - return_numpy=True) - yield pred, pred_shape, grts, img1_names, valid_shapes, org_shapes - - for pred, pred_shape, grts, img_names, valid_shapes, org_shapes in exe_run( - ): - idx = pred.shape[0] - if cfg.DATASET.INPUT_IMAGE_NUM == 2 and cfg.VIS.SEG_FOR_CD: - idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM - pred1, pred2 = pred[:idx], pred[ - idx:] # fluid.layers.split(pred, 2, dim=0) - num_imgs = pred1.shape[0] - # TODO: use multi-thread to write images - for i in range(num_imgs): - # Add more comments - res_map_list = [] - for pred in [pred1, pred2]: - if pred.shape[0] == 0: - continue - #res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) - res_map = np.squeeze(pred[i, :, :, :]).astype(np.float32) - res_shape = (res_map.shape[0], res_map.shape[1]) - if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[ - 1]: - res_map = cv2.resize( - res_map, pred_shape, interpolation=cv2.INTER_NEAREST) - valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) - res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] - org_shape = (org_shapes[i, 0], org_shapes[i, 1]) - res_map = cv2.resize( - res_map, (org_shape[1], org_shape[0]), - interpolation=cv2.INTER_NEAREST) - res_map_list.append(res_map) - - img_name = img_names[i] - png_fn = to_png_fn(img_name) - - # colorful segment result visualization - vis_fn = os.path.join(save_dir, png_fn) - dirname = os.path.dirname(vis_fn) - makedirs(dirname) - - if cfg.DATASET.INPUT_IMAGE_NUM == 1 or \ - (cfg.DATASET.INPUT_IMAGE_NUM == 2 and not cfg.VIS.SEG_FOR_CD): - res_map = res_map_list[0] - if cfg.VIS.RAW_PRED: - #pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') - #pred_mask.save(vis_fn) - np.save( - vis_fn.replace(".png", ".npy"), - res_map.astype(np.float32)) - else: - if cfg.VIS.ADD_LABEL: - grt_im = cv2.resize( - grts[i], - pred_shape, - interpolation=cv2.INTER_NEAREST) - res_map = np.hstack((res_map, grt_im)) - pred_mask = PILImage.fromarray( - res_map.astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(vis_fn) - else: - res_map1, res_map2 = res_map_list - diff = res_map1 * cfg.DATASET.NUM_CLASSES + res_map2 - unchange_idx = np.where((res_map1 - res_map2) == 0) - diff[unchange_idx] = 0 - res_map = np.hstack((res_map1, res_map2, diff)) - pred_mask = PILImage.fromarray( - res_map.astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(vis_fn) - - img_cnt += 1 - print("#{} visualize image path: {}".format(img_cnt, vis_fn)) - - # Use VisualDL to visualize image - if log_writer is not None: - # Calulate epoch from ckpt_dir folder name - epoch = int(os.path.split(ckpt_dir)[-1]) - print("VisualDL visualization epoch", epoch) - - pred_mask_np = np.array(pred_mask.convert("RGB")) - log_writer.add_image("Predict/{}".format(img_name), - pred_mask_np, epoch) - # Original image - # BGR->RGB - img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, - img_name))[..., ::-1] - log_writer.add_image("Images/{}".format(img_name), img, epoch) - # add ground truth (label) images - grt = grts[i] - if grt is not None: - grt = grt[0:valid_shape[0], 0:valid_shape[1]] - grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P') - grt_pil.putpalette(color_map) - grt_pil = grt_pil.resize((org_shape[1], org_shape[0])) - grt = np.array(grt_pil.convert("RGB")) - log_writer.add_image("Label/{}".format(img_name), grt, - epoch) - - # If in local_test mode, only visualize 5 images just for testing - # procedure - if local_test and img_cnt >= 5: - break - - -if __name__ == '__main__': - args = parse_args() - if args.cfg_file is not None: - cfg.update_from_file(args.cfg_file) - if args.opts: - cfg.update_from_list(args.opts) - cfg.check_and_infer() - print(pprint.pformat(cfg)) - visualize(cfg, **args.__dict__) diff --git a/legacy/contrib/NeurIPS_SN7/postprocess.py b/legacy/contrib/NeurIPS_SN7/postprocess.py deleted file mode 100644 index a0f3149e23..0000000000 --- a/legacy/contrib/NeurIPS_SN7/postprocess.py +++ /dev/null @@ -1,363 +0,0 @@ -import os -import re -import time -import random -import sys -import multiprocessing -import warnings -warnings.filterwarnings('ignore') - -import numpy as np -from PIL import Image -import cv2 -import skimage.io -from skimage.draw import polygon -from skimage import measure -from skimage.segmentation import watershed -from skimage.feature import peak_local_max -from scipy import ndimage as ndi -from rasterio import features - -import solaris as sol -from shapely.ops import cascaded_union -from shapely.geometry import shape, Polygon - - -def get_respond_img(npy_file): - sp = npy_file.split('.')[0].split('_') - aoi = '_'.join(sp[5:]) - src_img_path = os.path.join(src_img_root, aoi, "images_masked", - npy_file.replace(".npy", ".tif")) - return src_img_path - - -def get_building_polygon(mask, - thres_h, - thres_l, - distance, - min_area, - polygon_buffer, - conn=2, - watershed_line=True): - mask0 = mask > thres_h - local_maxi = peak_local_max( - mask, - indices=False, - footprint=np.ones((distance * 2 + 1, distance * 2 + 1)), - labels=(mask > thres_l)) - local_maxi[mask0] = True - seed_msk = ndi.label(local_maxi)[0] - - mask = watershed( - -mask, seed_msk, mask=(mask > thres_l), watershed_line=watershed_line) - mask = measure.label(mask, connectivity=conn, background=0).astype('uint8') - - geoms_np = [] - geoms_polygons = [] - polygon_generator = features.shapes(mask, mask) - for polygon, value in polygon_generator: - p = shape(polygon) - if polygon_buffer: - p = p.buffer(polygon_buffer) - if p.area >= min_area: - p = p.simplify(tolerance=0.5) - geoms_polygons.append(p) - try: - p = np.array(p.boundary.xy, dtype='int32').T - except: - p = np.array(p.boundary[0].xy, dtype='int32').T - geoms_np.append(p) - return geoms_np, geoms_polygons - - -def get_idx_in_polygon(contour): - contour = contour.reshape(-1, 2) - 0.5 - p = Polygon(contour).buffer(-0.25) - r, c = p.boundary.xy - rr, cc = polygon(r, c) - return rr, cc - - -def save_as_csv(file_path, npy_list, contours, footprints, npys): - aoi = '_'.join(npy_list[0].split('/')[-1].split('.')[0].split('_')[5:]) - print('save csv: %s, npoly = %d' % (aoi, footprints.sum())) - fw = open(file_path[:-4] + '_' + aoi + '.csv', 'w') - for j, contour in enumerate(contours): - contour = contour / 3 - p = Polygon(contour) - p = p.simplify(tolerance=0.2) - try: - contour = np.array(p.boundary.xy, dtype='float32').T - except: - contour = np.array(p.boundary[0].xy, dtype='float32').T - contour = np.round(contour.reshape(-1, 2) * 10) / 10 - - polygon_str = re.sub(r"[\[\]]", '', ",".join(map(str, contour))) - polygon_str = polygon_str.replace(". ", ' ') - polygon_str = polygon_str.replace(".,", ',') - polygon_str = re.sub(r" {2,}", ' ', polygon_str) - polygon_str = re.sub(r" {0,}, {0,}", ',', polygon_str) - - for i, npy_file in enumerate(npy_list): - filename = npy_file.split('/')[-1].split('.')[0] - flag = footprints[j, i] - if flag: - fw.write( - "%s,%d,\"POLYGON ((%s))\"\n" % (filename, j, polygon_str)) - fw.close() - - -def check_ious(poly1, - area1, - center1, - poly2s, - area2s, - center2s, - thr, - center_thr, - eps=1e-5): - dis = np.linalg.norm(center2s - center1, axis=1) - r = (np.sqrt(area2s) + np.sqrt(area1)) / np.pi - locs = np.where(dis < center_thr * r)[0] - for idx in locs: - poly2 = poly2s[idx] - intersection = poly1.intersection(poly2).area - if intersection < eps: - continue - else: - union = poly1.union(poly2).area - iou_score = intersection / float(union) - if iou_score >= thr: - return False - return True - - -def filter_countours(polygons, filter_polygons, margin): - filter_valid_polygons = [] - filter_areas = [] - filter_centers = [] - for p in filter_polygons: - if margin: - p = p.buffer(np.sqrt(p.area) * margin) - if p.is_valid: - filter_valid_polygons.append(p) - filter_areas.append(p.area) - filter_centers.append(np.array(p.bounds).reshape(2, 2).sum(1)) - filter_areas = np.array(filter_areas) - filter_centers = np.array(filter_centers) - - filter_res = np.zeros(len(polygons), 'bool') - for idx, p in enumerate(polygons): - if p.is_valid: - area = p.area - center = np.array(p.bounds).reshape(2, 2).sum(1) - if check_ious( - p, - area, - center, - filter_valid_polygons, - filter_areas, - filter_centers, - thr=0, - center_thr=5): - filter_res[idx] = True - filter_idxs = np.where(filter_res)[0] - return filter_idxs - - -def process(npy_list, - thres1=0.1, - thres2_h1=0.6, - thres2_l1=0.4, - thres2_h2=0.6, - thres2_l2=0.35, - thres3_1=0.3, - thres3_s=0.45, - thres3_d=0.5, - thres3_i=0, - thres3_m=0.4, - margin=0, - distance=5, - min_area=25.5, - polygon_buffer=0): - npy_list = sorted(npy_list) - npy_sum = 0 - ignore_sum = 0 - npys = [] - for iii, npy_file in enumerate(npy_list): - npy = np.load(npy_file) - img_file = get_respond_img(npy_file.split('/')[-1]) - src_img = skimage.io.imread(img_file) - mask = src_img[:, :, 3] == 0 - mask = np.repeat(mask, 3, axis=0) - mask = np.repeat(mask, 3, axis=1) - assert mask.shape[0] == src_img.shape[0] * 3 and mask.shape[ - 1] == src_img.shape[1] * 3 - - npy = npy[:mask.shape[0], :mask.shape[1]] - npy[mask] = -10000 - npys.append(npy) - - ignore_mask = (npy > thres1) - npy_sum = npy_sum + npy * ignore_mask - ignore_sum = ignore_sum + ignore_mask - - npy_mean = npy_sum / np.maximum(ignore_sum, 1) - - npys = np.array(npys) - img_num = npys.shape[0] - ''' ============================ For Change Detection ============================ ''' - contours, polygons = get_building_polygon( - npy_mean, - thres2_h1, - thres2_l1, - distance=distance, - min_area=min_area, - polygon_buffer=polygon_buffer) - - building_num = len(contours) - - score_map = np.zeros((building_num, img_num)) - - footprints = np.zeros_like(score_map) - - changeids = [] - for i, contour in enumerate(contours): - rr, cc = get_idx_in_polygon(contour) - point_filter = (rr < npys.shape[2]) & (cc < npys.shape[1]) & ( - cc >= 0) & (rr >= 0) - rr = rr[point_filter] - cc = cc[point_filter] - - scores = np.mean(npys[:, cc, rr], axis=1) - score_mask = scores >= 0 - - score_filter = np.zeros(img_num, dtype='bool') - max_score = scores.max() - - masked_scores = scores[score_mask] - left_mean = np.cumsum(masked_scores) / ( - np.arange(len(masked_scores)) + 1) - right_mean = (np.cumsum(masked_scores[::-1]) / - (np.arange(len(masked_scores)) + 1))[::-1] - - max_diff = 0 - for idx in range(len(masked_scores) - 1): - diff = right_mean[idx + 1] - left_mean[idx] - max_diff = max(diff, max_diff) - if max_diff > thres3_d: - break - if max_diff > thres3_d: - changeids.append(i) - start = False - for idx, score in enumerate(scores): - if not start: - if idx == 0 and score > thres3_1: - score_filter[idx] = 1 - start = True - if score > thres3_s * max_score: - score_filter[idx] = 1 - start = True - else: - if score > thres3_i: - score_filter[idx] = 1 - - footprints[i] = score_filter - - changeids = np.array(changeids) - change_contours = [contours[idx] for idx in changeids] - change_polygons = [polygons[idx] for idx in changeids] - change_footprints = footprints[changeids] - - # print('num change footprints:', len(changeids), change_footprints.sum()) - ''' ============================ For Tracking ============================ ''' - contours, polygons = get_building_polygon( - npy_mean, - thres2_h2, - thres2_l2, - distance=distance, - min_area=min_area, - polygon_buffer=polygon_buffer) - # print('num track footprints (before filter):', len(contours)) - filter_idx = filter_countours(polygons, change_polygons, margin) - contours = [contours[idx] for idx in filter_idx] - # print('num track footprints (after filter):', len(contours)) - - building_num = len(contours) - score_map = np.zeros((building_num, img_num)) - - footprints = np.zeros_like(score_map) - for i, contour in enumerate(contours): - rr, cc = get_idx_in_polygon(contour) - point_filter = (rr < npys.shape[2]) & (cc < npys.shape[1]) & ( - cc >= 0) & (rr >= 0) - rr = rr[point_filter] - cc = cc[point_filter] - - scores = np.mean(npys[:, cc, rr], axis=1) - score_mask = scores >= 0 - - score_filter = np.zeros(img_num, dtype='bool') - max_score = scores.max() - - masked_scores = scores[score_mask] - left_mean = np.cumsum(masked_scores) / ( - np.arange(len(masked_scores)) + 1) - right_mean = (np.cumsum(masked_scores[::-1]) / - (np.arange(len(masked_scores)) + 1))[::-1] - - if scores[scores >= 0].mean() > thres3_m: - score_filter[scores >= 0] = 1 - - footprints[i] = score_filter - - final_contours = change_contours + contours - final_footprints = np.concatenate([change_footprints, footprints], 0) - save_as_csv(out_file, npy_list, final_contours, final_footprints, npys) - - -def main(): - dic = {} - npy_files = [os.path.join(pred_root, x) for x in os.listdir(pred_root)] - for npy_file in npy_files: - key = '_'.join(npy_file.split('/')[-1].split('.')[0].split('_')[5:]) - if key not in dic: - dic[key] = [npy_file] - else: - dic[key].append(npy_file) - - if os.path.isfile(out_file): - os.remove(out_file) - with open(out_file, 'w') as fw: - fw.write("filename,id,geometry\n") - - params = [] - for aoi, npy_list in dic.items(): - print("Process:", aoi) - params.append(npy_list) - - print("Execute!") - print("len params:", len(params)) - n_threads = 10 - pool = multiprocessing.Pool(n_threads) - _ = pool.map(process, params) - - for aoi in dic.keys(): - print('Merge:', aoi) - with open(out_file, 'a') as fw: - with open(out_file[:-4] + '_' + aoi + '.csv', 'r') as fr: - for line in fr.readlines(): - fw.write(line.strip() + '\n') - for aoi in dic.keys(): - try: - os.remove(out_file[:-4] + '_' + aoi + '.csv') - except: - pass - print("Finish!") - - -if __name__ == "__main__": - src_img_root = sys.argv[1] - pred_root = sys.argv[2] - out_file = sys.argv[3] - main() diff --git a/legacy/contrib/NeurIPS_SN7/pretrained_model/download_model.py b/legacy/contrib/NeurIPS_SN7/pretrained_model/download_model.py deleted file mode 100644 index 43f44509ce..0000000000 --- a/legacy/contrib/NeurIPS_SN7/pretrained_model/download_model.py +++ /dev/null @@ -1,107 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - -model_urls = { - # ImageNet Pretrained - "mobilenetv2-2-0_bn_imagenet": - "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar", - "mobilenetv2-1-5_bn_imagenet": - "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar", - "mobilenetv2-1-0_bn_imagenet": - "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar", - "mobilenetv2-0-5_bn_imagenet": - "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar", - "mobilenetv2-0-25_bn_imagenet": - "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar", - "xception41_imagenet": - "https://paddleseg.bj.bcebos.com/models/Xception41_pretrained.tgz", - "xception65_imagenet": - "https://paddleseg.bj.bcebos.com/models/Xception65_pretrained.tgz", - "hrnet_w18_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w18_imagenet.tar", - "hrnet_w30_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w30_imagenet.tar", - "hrnet_w32_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w32_imagenet.tar", - "hrnet_w40_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w40_imagenet.tar", - "hrnet_w44_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w44_imagenet.tar", - "hrnet_w48_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w48_imagenet.tar", - "hrnet_w64_bn_imagenet": - "https://paddleseg.bj.bcebos.com/models/hrnet_w64_imagenet.tar", - - # COCO pretrained - "deeplabv3p_mobilenetv2-1-0_bn_coco": - "https://paddleseg.bj.bcebos.com/deeplab_mobilenet_x1_0_coco.tgz", - "deeplabv3p_xception65_bn_coco": - "https://paddleseg.bj.bcebos.com/models/xception65_coco.tgz", - "unet_bn_coco": - "https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz", - "pspnet50_bn_coco": - "https://paddleseg.bj.bcebos.com/models/pspnet50_coco.tgz", - "pspnet101_bn_coco": - "https://paddleseg.bj.bcebos.com/models/pspnet101_coco.tgz", - - # Cityscapes pretrained - "deeplabv3p_mobilenetv2-1-0_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/mobilenet_cityscapes.tgz", - "deeplabv3p_xception65_gn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz", - "deeplabv3p_xception65_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/xception65_bn_cityscapes.tgz", - "unet_bn_coco": - "https://paddleseg.bj.bcebos.com/models/unet_coco_v3.tgz", - "icnet_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/icnet_cityscapes.tar.gz", - "pspnet50_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/pspnet50_cityscapes.tgz", - "pspnet101_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/pspnet101_cityscapes.tgz", - "hrnet_w18_bn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/hrnet_w18_bn_cityscapes.tgz", - "fast_scnn_cityscapes": - "https://paddleseg.bj.bcebos.com/models/fast_scnn_cityscape.tar", -} - -if __name__ == "__main__": - if len(sys.argv) != 2: - print("usage:\n python download_model.py ${MODEL_NAME}") - exit(1) - - model_name = sys.argv[1] - if not model_name in model_urls.keys(): - print("Only support: \n {}".format("\n ".join( - list(model_urls.keys())))) - exit(1) - - url = model_urls[model_name] - download_file_and_uncompress( - url=url, - savepath=LOCAL_PATH, - extrapath=LOCAL_PATH, - extraname=model_name) - - print("Pretrained Model download success!") diff --git a/legacy/contrib/NeurIPS_SN7/src/__init__.py b/legacy/contrib/NeurIPS_SN7/src/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_infer.py b/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_infer.py deleted file mode 100644 index de508d7a69..0000000000 --- a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_infer.py +++ /dev/null @@ -1,12 +0,0 @@ -import solaris as sol -import os -config_path = '../yml/sn7_baseline_infer.yml' -config = sol.utils.config.parse(config_path) -print('Config:') -print(config) - -# make infernce output dir -os.makedirs(os.path.dirname(config['inference']['output_dir']), exist_ok=True) - -inferer = sol.nets.infer.Inferer(config) -inferer() diff --git a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_postproc_funcs.py b/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_postproc_funcs.py deleted file mode 100644 index c56b14eb6c..0000000000 --- a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_postproc_funcs.py +++ /dev/null @@ -1,365 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Aug 25 14:11:02 2020 - -@author: avanetten -""" - -from shapely.ops import cascaded_union -import matplotlib.pyplot as plt -import geopandas as gpd -import multiprocessing -import pandas as pd -import numpy as np -import skimage.io -import tqdm -import glob -import math -import gdal -import time -import os - -import solaris as sol -from solaris.utils.core import _check_gdf_load -from solaris.raster.image import create_multiband_geotiff - - -def map_wrapper(x): - '''For multi-threading''' - return x[0](*(x[1:])) - - -def multithread_polys(param): - '''Simple wrapper around mask_to_poly_geojson() for multiprocessing - # https://solaris.readthedocs.io/en/latest/_modules/solaris/vector/mask.html#mask_to_poly_geojson - # mask_to_poly_geojson(pred_arr, channel_scaling=None, reference_im=None, - # output_path=None, output_type='geojson', min_area=40, - # bg_threshold=0, do_transform=None, simplify=False, - # tolerance=0.5, **kwargs) - ''' - - [ - pred_image, min_area, output_path_pred, output_type, bg_threshold, - simplify - ] = param - print("output_pred:", os.path.basename(output_path_pred)) - sol.vector.mask.mask_to_poly_geojson( - pred_image, - min_area=min_area, - output_path=output_path_pred, - output_type=output_type, - bg_threshold=bg_threshold, - simplify=simplify) - - -def calculate_iou(pred_poly, test_data_GDF): - """Get the best intersection over union for a predicted polygon. - Adapted from: https://github.com/CosmiQ/solaris/blob/master/solaris/eval/iou.py, but - keeps index of test_data_GDF - - Arguments - --------- - pred_poly : :py:class:`shapely.Polygon` - Prediction polygon to test. - test_data_GDF : :py:class:`geopandas.GeoDataFrame` - GeoDataFrame of ground truth polygons to test ``pred_poly`` against. - Returns - ------- - iou_GDF : :py:class:`geopandas.GeoDataFrame` - A subset of ``test_data_GDF`` that overlaps ``pred_poly`` with an added - column ``iou_score`` which indicates the intersection over union value. - """ - - # Fix bowties and self-intersections - if not pred_poly.is_valid: - pred_poly = pred_poly.buffer(0.0) - - precise_matches = test_data_GDF[test_data_GDF.intersects(pred_poly)] - - iou_row_list = [] - for idx, row in precise_matches.iterrows(): - # Load ground truth polygon and check exact iou - test_poly = row.geometry - # Ignore invalid polygons for now - if pred_poly.is_valid and test_poly.is_valid: - intersection = pred_poly.intersection(test_poly).area - union = pred_poly.union(test_poly).area - # Calculate iou - iou_score = intersection / float(union) - gt_idx = idx - else: - iou_score = 0 - gt_idx = -1 - row['iou_score'] = iou_score - row['gt_idx'] = gt_idx - iou_row_list.append(row) - - iou_GDF = gpd.GeoDataFrame(iou_row_list) - return iou_GDF - - -def track_footprint_identifiers(json_dir, - out_dir, - min_iou=0.25, - iou_field='iou_score', - id_field='Id', - reverse_order=False, - verbose=True, - super_verbose=False): - ''' - Track footprint identifiers in the deep time stack. - We need to track the global gdf instead of just the gdf of t-1. - ''' - - os.makedirs(out_dir, exist_ok=True) - - # set columns for master gdf - gdf_master_columns = [id_field, iou_field, 'area', 'geometry'] - - json_files = sorted([ - f for f in os.listdir(os.path.join(json_dir)) - if f.endswith('.geojson') and os.path.exists(os.path.join(json_dir, f)) - ]) - # start at the end and work backwards? - if reverse_order: - json_files = json_files[::-1] - - # check if only partical matching has been done (this will cause errors) - out_files_tmp = sorted( - [z for z in os.listdir(out_dir) if z.endswith('.geojson')]) - if len(out_files_tmp) > 0: - if len(out_files_tmp) != len(json_files): - raise Exception( - "\nError in:", out_dir, "with N =", len(out_files_tmp), - "files, need to purge this folder and restart matching!\n") - return - elif len(out_files_tmp) == len(json_files): - print("\nDir:", os.path.basename(out_dir), "N files:", - len(json_files), "directory matching completed, skipping...") - return - else: - print("\nMatching json_dir: ", os.path.basename(json_dir), "N json:", - len(json_files)) - - gdf_dict = {} - for j, f in enumerate(json_files): - - name_root = f.split('.')[0] - json_path = os.path.join(json_dir, f) - output_path = os.path.join(out_dir, f) - - if verbose and ((j % 1) == 0): - print(" ", j, "/", len(json_files), "for", - os.path.basename(json_dir), "=", name_root) - - # gdf - gdf_now = gpd.read_file(json_path) - # drop value if it exists - gdf_now = gdf_now.drop(columns=['value']) - # get area - gdf_now['area'] = gdf_now['geometry'].area - # initialize iou, id - gdf_now[iou_field] = -1 - gdf_now[id_field] = -1 - # sort by reverse area - gdf_now.sort_values(by=['area'], ascending=False, inplace=True) - gdf_now = gdf_now.reset_index(drop=True) - # reorder columns (if needed) - gdf_now = gdf_now[gdf_master_columns] - id_set = set([]) - - if verbose: - print("\n") - print("", j, "file_name:", f) - print(" ", "gdf_now.columns:", gdf_now.columns) - - if j == 0: - # Establish initial footprints at Epoch0 - # set id - gdf_now[id_field] = gdf_now.index.values - gdf_now[iou_field] = 0 - n_new = len(gdf_now) - n_matched = 0 - id_set = set(gdf_now[id_field].values) - gdf_master_Out = gdf_now.copy(deep=True) - # gdf_dict[f] = gdf_now - else: - # match buildings in epochT to epochT-1 - # see: https://github.com/CosmiQ/solaris/blob/master/solaris/eval/base.py - # print("gdf_master;", gdf_dict['master']) #gdf_master) - gdf_master_Out = gdf_dict['master'].copy(deep=True) - gdf_master_Edit = gdf_dict['master'].copy(deep=True) - - if verbose: - print(" len gdf_now:", len(gdf_now), "len(gdf_master):", - len(gdf_master_Out), "max master id:", - np.max(gdf_master_Out[id_field])) - print(" gdf_master_Edit.columns:", gdf_master_Edit.columns) - - new_id = np.max(gdf_master_Edit[id_field]) + 1 - # if verbose: - # print("new_id:", new_id) - idx = 0 - n_new = 0 - n_matched = 0 - for pred_idx, pred_row in gdf_now.iterrows(): - if verbose: - if (idx % 1000) == 0: - print(" ", name_root, idx, "/", len(gdf_now)) - if super_verbose: - # print(" ", i, j, idx, "/", len(gdf_now)) - print(" ", idx, "/", len(gdf_now)) - idx += 1 - pred_poly = pred_row.geometry - # if super_verbose: - # print(" pred_poly.exterior.coords:", list(pred_poly.exterior.coords)) - - # get iou overlap - iou_GDF = calculate_iou(pred_poly, gdf_master_Edit) - # iou_GDF = iou.calculate_iou(pred_poly, gdf_master_Edit) - # print("iou_GDF:", iou_GDF) - - # Get max iou - if not iou_GDF.empty: - max_iou_row = iou_GDF.loc[iou_GDF['iou_score'].idxmax( - axis=0, skipna=True)] - # sometimes we are get an erroneous id of 0, caused by nan area, - # so check for this - max_area = max_iou_row.geometry.area - if max_area == 0 or math.isnan(max_area): - # print("nan area!", max_iou_row, "returning...") - raise Exception("\n Nan area!:", max_iou_row, - "returning...") - return - - id_match = max_iou_row[id_field] - if id_match in id_set: - print("Already seen id! returning...") - raise Exception("\n Already seen id!", id_match, - "returning...") - return - - # print("iou_GDF:", iou_GDF) - if max_iou_row['iou_score'] >= min_iou: - if super_verbose: - print(" pred_idx:", pred_idx, "match_id:", - max_iou_row[id_field], "max iou:", - max_iou_row['iou_score']) - # we have a successful match, so set iou, and id - gdf_now.loc[pred_row. - name, iou_field] = max_iou_row['iou_score'] - gdf_now.loc[pred_row.name, id_field] = id_match - # drop matched polygon in ground truth - gdf_master_Edit = gdf_master_Edit.drop( - max_iou_row.name, axis=0) - n_matched += 1 - # # update gdf_master geometry? - # # Actually let's leave the geometry the same so it doesn't move around... - # gdf_master_Out.at[max_iou_row['gt_idx'], 'geometry'] = pred_poly - # gdf_master_Out.at[max_iou_row['gt_idx'], 'area'] = pred_poly.area - # gdf_master_Out.at[max_iou_row['gt_idx'], iou_field] = max_iou_row['iou_score'] - - else: - # no match, - if super_verbose: - print(" Minimal match! - pred_idx:", pred_idx, - "match_id:", max_iou_row[id_field], - "max iou:", max_iou_row['iou_score']) - print(" Using new id:", new_id) - if (new_id in id_set) or (new_id == 0): - raise Exception( - "trying to add an id that already exists, returning!" - ) - return - gdf_now.loc[pred_row.name, iou_field] = 0 - gdf_now.loc[pred_row.name, id_field] = new_id - id_set.add(new_id) - # update master, cols = [id_field, iou_field, 'area', 'geometry'] - gdf_master_Out.loc[new_id] = [ - new_id, 0, pred_poly.area, pred_poly - ] - new_id += 1 - n_new += 1 - - else: - # no match (same exact code as right above) - if super_verbose: - print(" pred_idx:", pred_idx, "no overlap, new_id:", - new_id) - if (new_id in id_set) or (new_id == 0): - raise Exception( - "trying to add an id that already exists, returning!" - ) - return - gdf_now.loc[pred_row.name, iou_field] = 0 - gdf_now.loc[pred_row.name, id_field] = new_id - id_set.add(new_id) - # update master, cols = [id_field, iou_field, 'area', 'geometry'] - gdf_master_Out.loc[new_id] = [ - new_id, 0, pred_poly.area, pred_poly - ] - new_id += 1 - n_new += 1 - - # print("gdf_now:", gdf_now) - gdf_dict[f] = gdf_now - gdf_dict['master'] = gdf_master_Out - - # save! - if len(gdf_now) > 0: - gdf_now.to_file(output_path, driver="GeoJSON") - else: - print("Empty dataframe, writing empty gdf", output_path) - open(output_path, 'a').close() - - if verbose: - print(" ", "N_new, N_matched:", n_new, n_matched) - - return - - -def sn7_convert_geojsons_to_csv(json_dirs, - output_csv_path, - population='proposal'): - ''' - Convert jsons to csv - Population is either "ground" or "proposal" - ''' - - first_file = True # switch that will be turned off once we process the first file - for json_dir in tqdm.tqdm(json_dirs): - json_files = sorted(glob.glob(os.path.join(json_dir, '*.geojson'))) - for json_file in tqdm.tqdm(json_files): - try: - df = gpd.read_file(json_file) - except: - message = '! Invalid dataframe for %s' % json_file - print(message) - continue - #raise Exception(message) - if population == 'ground': - file_name_col = df.image_fname.apply(lambda x: os.path.splitext( - x)[0]) - elif population == 'proposal': - file_name_col = os.path.splitext(os.path.basename(json_file))[0] - else: - raise Exception('! Invalid population') - df = gpd.GeoDataFrame({ - 'filename': file_name_col, - 'id': df.Id.astype(int), - 'geometry': df.geometry, - }) - if len(df) == 0: - message = '! Empty dataframe for %s' % json_file - print(message) - #raise Exception(message) - - if first_file: - net_df = df - first_file = False - else: - net_df = net_df.append(df) - - net_df.to_csv(output_csv_path, index=False) - return net_df diff --git a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_prep_funcs.py b/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_prep_funcs.py deleted file mode 100644 index 34943fd503..0000000000 --- a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_prep_funcs.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Aug 25 14:11:02 2020 - -@author: avanetten -""" - -import multiprocessing -import pandas as pd -import numpy as np -import skimage -import gdal -import os - -import solaris as sol -from solaris.raster.image import create_multiband_geotiff -from solaris.utils.core import _check_gdf_load - - -def map_wrapper(x): - '''For multi-threading''' - return x[0](*(x[1:])) - - -def make_geojsons_and_masks(name_root, - image_path, - json_path, - output_path_mask, - output_path_mask_fbc=None): - ''' - Make the stuffins - mask_fbc is an (optional) three-channel fbc (footbrint, boundary, contact) mask - ''' - - print(" name_root:", name_root) - - # filter out null geoms (this is always a worthy check) - gdf_tmp = _check_gdf_load(json_path) - if len(gdf_tmp) == 0: - gdf_nonull = gdf_tmp - else: - gdf_nonull = gdf_tmp[gdf_tmp.geometry.notnull()] - try: - im_tmp = skimage.io.imread(image_path) - except: - print("Error loading image %s, skipping..." % (image_path)) - return - - # handle empty geojsons - if len(gdf_nonull) == 0: - # create masks - # mask 1 has 1 channel - # mask_fbc has 3 channel - print(" Empty labels for name_root!", name_root) - im = gdal.Open(image_path) - proj = im.GetProjection() - geo = im.GetGeoTransform() - im = im.ReadAsArray() - # set masks to 0 everywhere - mask_arr = np.zeros((1, im.shape[1], im.shape[2])) - create_multiband_geotiff(mask_arr, output_path_mask, proj, geo) - if output_path_mask_fbc: - mask_arr = np.zeros((3, im.shape[1], im.shape[2])) - create_multiband_geotiff(mask_arr, output_path_mask_fbc, proj, geo) - return - - # make masks (single channel) - # https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb - f_mask = sol.vector.mask.df_to_px_mask( - df=gdf_nonull, - out_file=output_path_mask, - channels=['footprint'], - reference_im=image_path, - shape=(im_tmp.shape[0], im_tmp.shape[1])) - - # three channel mask (takes awhile) - # https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb - if output_path_mask_fbc: - fbc_mask = sol.vector.mask.df_to_px_mask( - df=gdf_nonull, - out_file=output_path_mask_fbc, - channels=['footprint', 'boundary', 'contact'], - reference_im=image_path, - boundary_width=5, - contact_spacing=10, - meters=True, - shape=(im_tmp.shape[0], im_tmp.shape[1])) - - return diff --git a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_train.py b/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_train.py deleted file mode 100644 index 773cf5e9ee..0000000000 --- a/legacy/contrib/NeurIPS_SN7/src/sn7_baseline_train.py +++ /dev/null @@ -1,13 +0,0 @@ -import solaris as sol -import os -config_path = '../yml/sn7_baseline_train.yml' -config = sol.utils.config.parse(config_path) -print('Config:') -print(config) - -# make model output dir -os.makedirs( - os.path.dirname(config['training']['model_dest_path']), exist_ok=True) - -trainer = sol.nets.train.Trainer(config=config) -trainer.train() diff --git a/legacy/contrib/NeurIPS_SN7/test.sh b/legacy/contrib/NeurIPS_SN7/test.sh deleted file mode 100644 index f2fbc79725..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test.sh +++ /dev/null @@ -1,22 +0,0 @@ -source activate solaris -test_data_path=$1 -output_path=$2 - -if [ ! -d /wdata/saved_model/hrnet/best_model ]; then - bash download.sh -fi - -rm -r /wdata/test -cp -r $test_data_path /wdata/test -rm /wdata/test/* - -python tools.py /wdata/test test -cp dummy.tif /wdata/test - -python pdseg/eval.py --use_gpu --vis --vis_dir vis/test_org --cfg hrnet_sn7.yaml DATASET.DATA_DIR /wdata/test DATASET.VAL_FILE_LIST test_list.txt VIS.VISINEVAL True TEST.TEST_AUG True - -python tools.py vis/test_org compose - -python postprocess.py /wdata/test vis/test_org_compose "$output_path" - -rm -r vis diff --git a/legacy/contrib/NeurIPS_SN7/test/ci/check_code_style.sh b/legacy/contrib/NeurIPS_SN7/test/ci/check_code_style.sh deleted file mode 100644 index 03db3711c4..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/ci/check_code_style.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -function abort(){ - echo "Your change doesn't follow PaddleSeg's code style." 1>&2 - echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 -} - -trap 'abort' 0 -set -e - -cd $TRAVIS_BUILD_DIR -export PATH=/usr/bin:$PATH -pre-commit install - -if ! pre-commit run -a ; then - git diff - exit 1 -fi - -trap : 0 diff --git a/legacy/contrib/NeurIPS_SN7/test/ci/test_download_dataset.sh b/legacy/contrib/NeurIPS_SN7/test/ci/test_download_dataset.sh deleted file mode 100644 index 168874d07f..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/ci/test_download_dataset.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -set -o errexit - -base_path=$(cd `dirname $0`/../..; pwd) -cd $base_path - -python dataset/download_pet.py diff --git a/legacy/contrib/NeurIPS_SN7/test/configs/deeplabv3p_xception65_cityscapes.yaml b/legacy/contrib/NeurIPS_SN7/test/configs/deeplabv3p_xception65_cityscapes.yaml deleted file mode 100644 index d28ffe980f..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/configs/deeplabv3p_xception65_cityscapes.yaml +++ /dev/null @@ -1,43 +0,0 @@ -EVAL_CROP_SIZE: (2049, 1025) # (width, height), for unpadding rangescaling and stepscaling -TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - MAX_SCALE_FACTOR: 2.0 # for stepscaling - MIN_SCALE_FACTOR: 0.5 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/cityscapes/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 19 - TEST_FILE_LIST: "dataset/cityscapes/val.list" - TRAIN_FILE_LIST: "dataset/cityscapes/train.list" - VAL_FILE_LIST: "dataset/cityscapes/val.list" - VIS_FILE_LIST: "dataset/cityscapes/vis.list" - SEPARATOR: " " - IGNORE_INDEX: 255 -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - DEFAULT_NORM_TYPE: "gn" - MODEL_NAME: "deeplabv3p" - DEEPLAB: - ASPP_WITH_SEP_CONV: True - DECODER_USE_SEP_CONV: True -TEST: - TEST_MODEL: "./saved_model/cityscape_v5/final/" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/cityscape_v5/" - PRETRAINED_MODEL_DIR: "pretrained_model/deeplabv3plus_gn_init" - SNAPSHOT_EPOCH: 10 -SOLVER: - LR: 0.001 - LR_POLICY: "poly" - OPTIMIZER: "sgd" - NUM_EPOCHS: 700 diff --git a/legacy/contrib/NeurIPS_SN7/test/configs/unet_pet.yaml b/legacy/contrib/NeurIPS_SN7/test/configs/unet_pet.yaml deleted file mode 100644 index c853d44532..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/configs/unet_pet.yaml +++ /dev/null @@ -1,42 +0,0 @@ -TRAIN_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -EVAL_CROP_SIZE: (512, 512) # (width, height), for unpadding rangescaling and stepscaling -AUG: - AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling - FIX_RESIZE_SIZE: (512, 512) # (width, height), for unpadding - - INF_RESIZE_VALUE: 500 # for rangescaling - MAX_RESIZE_VALUE: 600 # for rangescaling - MIN_RESIZE_VALUE: 400 # for rangescaling - - MAX_SCALE_FACTOR: 1.25 # for stepscaling - MIN_SCALE_FACTOR: 0.75 # for stepscaling - SCALE_STEP_SIZE: 0.25 # for stepscaling - MIRROR: True -BATCH_SIZE: 4 -DATASET: - DATA_DIR: "./dataset/mini_pet/" - IMAGE_TYPE: "rgb" # choice rgb or rgba - NUM_CLASSES: 3 - TEST_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - TRAIN_FILE_LIST: "./dataset/mini_pet/file_list/train_list.txt" - VAL_FILE_LIST: "./dataset/mini_pet/file_list/val_list.txt" - VIS_FILE_LIST: "./dataset/mini_pet/file_list/test_list.txt" - IGNORE_INDEX: 255 - SEPARATOR: " " -FREEZE: - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" -MODEL: - MODEL_NAME: "unet" - DEFAULT_NORM_TYPE: "bn" -TEST: - TEST_MODEL: "./saved_model/unet_pet/final/" -TRAIN: - MODEL_SAVE_DIR: "./saved_model/unet_pet/" - PRETRAINED_MODEL_DIR: "./test/models/unet_coco_init/" - SNAPSHOT_EPOCH: 10 -SOLVER: - NUM_EPOCHS: 100 - LR: 0.005 - LR_POLICY: "poly" - OPTIMIZER: "adam" diff --git a/legacy/contrib/NeurIPS_SN7/test/local_test_cityscapes.py b/legacy/contrib/NeurIPS_SN7/test/local_test_cityscapes.py deleted file mode 100644 index a4355c6f7d..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/local_test_cityscapes.py +++ /dev/null @@ -1,83 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from test_utils import download_file_and_uncompress, train, eval, vis, export_model -import os -import argparse - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -DATASET_PATH = os.path.join(LOCAL_PATH, "..", "dataset") -MODEL_PATH = os.path.join(LOCAL_PATH, "models") - - -def download_cityscapes_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -def download_deeplabv3p_xception65_cityscapes_model(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/models/deeplabv3p_xception65_cityscapes.tgz" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_cityscapes_dataset(".", DATASET_PATH) - download_deeplabv3p_xception65_cityscapes_model(".", MODEL_PATH) - - model_name = "deeplabv3p_xception65_cityscapes" - test_model = os.path.join(LOCAL_PATH, "models", model_name) - cfg = os.path.join(LOCAL_PATH, "configs", "{}.yaml".format(model_name)) - freeze_save_dir = os.path.join(LOCAL_PATH, "inference_model", model_name) - vis_dir = os.path.join(LOCAL_PATH, "visual", model_name) - saved_model = os.path.join(LOCAL_PATH, "saved_model", model_name) - - parser = argparse.ArgumentParser(description="PaddleSeg loacl test") - parser.add_argument( - "--devices", - dest="devices", - help="GPU id of running. if more than one, use spacing to separate.", - nargs="+", - default=[0], - type=int) - args = parser.parse_args() - - devices = [str(x) for x in args.devices] - - export_model( - flags=["--cfg", cfg], - options=[ - "TEST.TEST_MODEL", test_model, "FREEZE.SAVE_DIR", freeze_save_dir - ], - devices=devices) - - # Final eval results should be #image=500 acc=0.9615 IoU=0.7804 - eval( - flags=["--cfg", cfg, "--use_gpu"], - options=["TEST.TEST_MODEL", test_model], - devices=devices) - - vis(flags=["--cfg", cfg, "--use_gpu", "--local_test", "--vis_dir", vis_dir], - options=["TEST.TEST_MODEL", test_model], - devices=devices) - - train( - flags=["--cfg", cfg, "--use_gpu", "--log_steps", "10"], - options=[ - "SOLVER.NUM_EPOCHS", "1", "TRAIN.PRETRAINED_MODEL_DIR", test_model, - "TRAIN.MODEL_SAVE_DIR", saved_model - ], - devices=devices) diff --git a/legacy/contrib/NeurIPS_SN7/test/local_test_pet.py b/legacy/contrib/NeurIPS_SN7/test/local_test_pet.py deleted file mode 100644 index f55942532f..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/local_test_pet.py +++ /dev/null @@ -1,104 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from test_utils import download_file_and_uncompress, train, eval, vis, export_model -import os -import argparse - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -DATASET_PATH = os.path.join(LOCAL_PATH, "..", "dataset") -MODEL_PATH = os.path.join(LOCAL_PATH, "models") - - -def download_pet_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/mini_pet.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -def download_unet_coco_model(savepath, extrapath): - url = "https://bj.bcebos.com/v1/paddleseg/models/unet_coco_init.tgz" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_pet_dataset(LOCAL_PATH, DATASET_PATH) - download_unet_coco_model(LOCAL_PATH, MODEL_PATH) - - model_name = "unet_pet" - test_model = os.path.join(LOCAL_PATH, "models", "unet_coco_init") - cfg = os.path.join(LOCAL_PATH, "..", "configs", - "{}.yaml".format(model_name)) - freeze_save_dir = os.path.join(LOCAL_PATH, "inference_model", model_name) - vis_dir = os.path.join(LOCAL_PATH, "visual", model_name) - saved_model = os.path.join(LOCAL_PATH, "saved_model", model_name) - - parser = argparse.ArgumentParser(description="PaddleSeg loacl test") - parser.add_argument( - "--devices", - dest="devices", - help="GPU id of running. if more than one, use spacing to separate.", - nargs="+", - default=[0], - type=int) - args = parser.parse_args() - - devices = [str(x) for x in args.devices] - - train( - flags=["--cfg", cfg, "--use_gpu", "--log_steps", "10"], - options=[ - "SOLVER.NUM_EPOCHS", "1", "TRAIN.PRETRAINED_MODEL_DIR", test_model, - "TRAIN.MODEL_SAVE_DIR", saved_model, "DATASET.TRAIN_FILE_LIST", - os.path.join(DATASET_PATH, "mini_pet", "file_list", - "train_list.txt"), "DATASET.VAL_FILE_LIST", - os.path.join(DATASET_PATH, "mini_pet", "file_list", - "val_list.txt"), "DATASET.TEST_FILE_LIST", - os.path.join(DATASET_PATH, "mini_pet", "file_list", - "test_list.txt"), "DATASET.DATA_DIR", - os.path.join(DATASET_PATH, "mini_pet"), "BATCH_SIZE", "1" - ], - devices=devices) - - eval( - flags=["--cfg", cfg, "--use_gpu"], - options=[ - "TEST.TEST_MODEL", - os.path.join(saved_model, "final"), "DATASET.VAL_FILE_LIST", - os.path.join(DATASET_PATH, "mini_pet", "file_list", "val_list.txt"), - "DATASET.DATA_DIR", - os.path.join(DATASET_PATH, "mini_pet") - ], - devices=devices) - - vis(flags=["--cfg", cfg, "--use_gpu", "--local_test", "--vis_dir", vis_dir], - options=[ - "DATASET.TEST_FILE_LIST", - os.path.join(DATASET_PATH, "mini_pet", "file_list", - "test_list.txt"), "DATASET.DATA_DIR", - os.path.join(DATASET_PATH, "mini_pet"), "TEST.TEST_MODEL", - os.path.join(saved_model, "final") - ], - devices=devices) - - export_model( - flags=["--cfg", cfg], - options=[ - "TEST.TEST_MODEL", - os.path.join(saved_model, "final"), "FREEZE.SAVE_DIR", - freeze_save_dir - ], - devices=devices) diff --git a/legacy/contrib/NeurIPS_SN7/test/test_utils.py b/legacy/contrib/NeurIPS_SN7/test/test_utils.py deleted file mode 100644 index 3aa75502f5..0000000000 --- a/legacy/contrib/NeurIPS_SN7/test/test_utils.py +++ /dev/null @@ -1,193 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import time -import shutil -import requests -import sys -import tarfile -import zipfile -import platform -import functools - -lasttime = time.time() -FLUSH_INTERVAL = 0.1 - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -PDSEG_PATH = os.path.join(LOCAL_PATH, "..", "pdseg") - - -def get_platform(): - return platform.platform() - - -def is_windows(): - return get_platform().lower().startswith("windows") - - -def progress(str, end=False): - global lasttime - if end: - str += "\n" - lasttime = 0 - if time.time() - lasttime >= FLUSH_INTERVAL: - sys.stdout.write("\r%s" % str) - lasttime = time.time() - sys.stdout.flush() - - -def _download_file(url, savepath, print_progress): - r = requests.get(url, stream=True) - total_length = r.headers.get('content-length') - - if total_length is None: - with open(savepath, 'wb') as f: - shutil.copyfileobj(r.raw, f) - else: - with open(savepath, 'wb') as f: - dl = 0 - total_length = int(total_length) - starttime = time.time() - if print_progress: - print("Downloading %s" % os.path.basename(savepath)) - for data in r.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if print_progress: - done = int(50 * dl / total_length) - progress("[%-50s] %.2f%%" % - ('=' * done, float(100 * dl) / total_length)) - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - - -def _uncompress_file(filepath, extrapath, delete_file, print_progress): - if print_progress: - print("Uncompress %s" % os.path.basename(filepath)) - - if filepath.endswith("zip"): - handler = _uncompress_file_zip - elif filepath.endswith("tgz"): - handler = _uncompress_file_tar - else: - handler = functools.partial(_uncompress_file_tar, mode="r") - - for total_num, index, rootpath in handler(filepath, extrapath): - if print_progress: - done = int(50 * float(index) / total_num) - progress( - "[%-50s] %.2f%%" % ('=' * done, float(100 * index) / total_num)) - if print_progress: - progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) - - if delete_file: - os.remove(filepath) - - return rootpath - - -def _uncompress_file_zip(filepath, extrapath): - files = zipfile.ZipFile(filepath, 'r') - filelist = files.namelist() - rootpath = filelist[0] - total_num = len(filelist) - for index, file in enumerate(filelist): - files.extract(file, extrapath) - yield total_num, index, rootpath - files.close() - yield total_num, index, rootpath - - -def _uncompress_file_tar(filepath, extrapath, mode="r:gz"): - files = tarfile.open(filepath, mode) - filelist = files.getnames() - total_num = len(filelist) - rootpath = filelist[0] - for index, file in enumerate(filelist): - files.extract(file, extrapath) - yield total_num, index, rootpath - files.close() - yield total_num, index, rootpath - - -def download_file_and_uncompress(url, - savepath=None, - extrapath=None, - extraname=None, - print_progress=True, - cover=False, - delete_file=True): - if savepath is None: - savepath = "." - - if extrapath is None: - extrapath = "." - - savename = url.split("/")[-1] - savepath = os.path.join(savepath, savename) - savename = ".".join(savename.split(".")[:-1]) - savename = os.path.join(extrapath, savename) - extraname = savename if extraname is None else os.path.join( - extrapath, extraname) - - if cover: - if os.path.exists(savepath): - shutil.rmtree(savepath) - if os.path.exists(savename): - shutil.rmtree(savename) - if os.path.exists(extraname): - shutil.rmtree(extraname) - - if not os.path.exists(extraname): - if not os.path.exists(savename): - if not os.path.exists(savepath): - _download_file(url, savepath, print_progress) - savename = _uncompress_file(savepath, extrapath, delete_file, - print_progress) - savename = os.path.join(extrapath, savename) - shutil.move(savename, extraname) - - -def _pdseg(command, flags, options, devices): - script = "{}{}{}.py".format(PDSEG_PATH, os.sep, command) - flags = " ".join(flags) - options = " ".join(options) - if is_windows(): - set_cuda_command = "set CUDA_VISIBLE_DEVICES={}".format( - ",".join(devices)) - else: - set_cuda_command = "export CUDA_VISIBLE_DEVICES={}".format( - ",".join(devices)) - cmd = "{} && python {} {} {}".format(set_cuda_command, script, flags, - options) - print(cmd) - os.system(cmd) - - -def train(flags, options, devices): - _pdseg("train", flags, options, devices) - - -def eval(flags, options, devices): - _pdseg("eval", flags, options, devices) - - -def vis(flags, options, devices): - _pdseg("vis", flags, options, devices) - - -def export_model(flags, options, devices): - _pdseg("export_model", flags, options, devices) diff --git a/legacy/contrib/NeurIPS_SN7/test_list.txt b/legacy/contrib/NeurIPS_SN7/test_list.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/NeurIPS_SN7/tools.py b/legacy/contrib/NeurIPS_SN7/tools.py deleted file mode 100644 index 6af25ab5f9..0000000000 --- a/legacy/contrib/NeurIPS_SN7/tools.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys - -import data_lib as dlib - -path = sys.argv[1] -mode = sys.argv[2] - -if mode == "train": - dlib.create_label(path, f3x=False) - dlib.enlarge_3x(path) - dlib.create_label(path, f3x=True) - dlib.divide(path) - dlib.create_trainval_list(path) -elif mode == "test": - dlib.enlarge_3x(path) - dlib.divide(path) - dlib.create_test_list(path) -else: - dlib.compose(path) diff --git a/legacy/contrib/NeurIPS_SN7/train.sh b/legacy/contrib/NeurIPS_SN7/train.sh deleted file mode 100644 index bc9c121680..0000000000 --- a/legacy/contrib/NeurIPS_SN7/train.sh +++ /dev/null @@ -1,16 +0,0 @@ -source activate solaris -train_data_path=$1 - -rm -r /wdata/saved_model/hrnet/best_model/ - -rm -r /wdata/train -cp -r $train_data_path /wdata/train -rm /wdata/train/* - -python tools.py /wdata/train train 2>err.log - -cd pretrained_model -python download_model.py hrnet_w48_bn_imagenet -cd .. - -python pdseg/train.py --do_eval --use_gpu --cfg hrnet_sn7.yaml DATASET.DATA_DIR /wdata/train DATASET.TEST_FILE_LIST val_list.txt diff --git a/legacy/contrib/NeurIPS_SN7/train_list.txt b/legacy/contrib/NeurIPS_SN7/train_list.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/NeurIPS_SN7/val_list.txt b/legacy/contrib/NeurIPS_SN7/val_list.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/README.md b/legacy/contrib/README.md deleted file mode 100644 index fade4d0194..0000000000 --- a/legacy/contrib/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# PaddleSeg 产业实践 - -提供基于PaddlSeg最新的分割特色模型: - -- [人像分割](./HumanSeg) -- [遥感分割](./RemoteSensing) -- [人体解析](./ACE2P) -- [车道线分割](./LaneNet) -- [工业表盘分割](#工业表盘分割) -- [AIStudio在线教程](#AIStudio在线教程) - -## 人像分割 HumanSeg - -HumanSeg系列全新升级,提供三个适用于不同场景,包含适用于移动端实时分割场景的模型`HumanSeg-lite`,提供了包含光流的后处理的优化,使人像分割在视频场景中更加顺畅,更多详情请参考[HumanSeg](./HumanSeg) - -## 遥感分割 Remote Sensing Segmentation -PaddleSeg遥感影像分割涵盖图像预处理、数据增强、模型训练、预测流程。 -针对遥感数据多通道、分布范围大、分布不均的特点,我们支持多通道训练预测,内置10+多通道预处理和数据增强的策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 -内置U-Net, HRNet两种主流分割网络,可选择不同的损失函数如Dice Loss, BCE Loss等方式强化小目标和不均衡样本场景下的分割精度。更多详情请参考[RemoteSensing](./RemoteSensing) - -以下是遥感云检测的示例效果: - -![](./RemoteSensing/docs/imgs/vis.png) - -## 人体解析 Human Parsing - -人体解析(Human Parsing)是细粒度的语义分割任务,旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。ACE2P通过融合底层特征、全局上下文信息和边缘细节,端到端训练学习人体解析任务。以ACE2P单人人体解析网络为基础的解决方案在CVPR2019第三届LIP挑战赛中赢得了全部三个人体解析任务的第一名 - - -#### ACE2P模型框架图 -![](./ACE2P/imgs/net.jpg) - -PaddleSeg提供了ACE2P获得比赛冠军的预训练模型,更多详情请点击[ACE2P](./ACE2P) - -## 车道线分割 LaneNet - -PaddleSeg提供了基于LaneNet的车道线分割模型,更多详情请点击[LaneNet](./LaneNet) - -![](https://pic2.zhimg.com/80/v2-8015f4b256791d4456fbc2739efc106d_1440w.jpg) - - -## 工业表盘分割 - - -**Note:** 本章节所有命令均在`PaddleSeg`目录下执行。 - -### 1. 模型结构 - -U-Net - -### 2. 数据准备 - -执行以下命令下载并解压数据集,数据集将存放在contrib/MechanicalIndustryMeter文件夹下: - -``` -python ./contrib/MechanicalIndustryMeter/download_mini_mechanical_industry_meter.py -``` - -### 3. 下载预训练模型 - -``` -python ./pretrained_model/download_model.py unet_bn_coco -``` - -### 4. 训练与评估 - -``` -export CUDA_VISIBLE_DEVICES=0 -python ./pdseg/train.py --log_steps 10 --cfg contrib/MechanicalIndustryMeter/unet_mechanical_meter.yaml --use_gpu --do_eval --use_mpio -``` - -### 5. 可视化 -我们已提供了一个训练好的模型,执行以下命令进行下载,下载后将存放在./contrib/MechanicalIndustryMeter/文件夹下。 - -``` -python ./contrib/MechanicalIndustryMeter/download_unet_mechanical_industry_meter.py -``` - -使用该模型进行预测可视化: - -``` -python ./pdseg/vis.py --cfg contrib/MechanicalIndustryMeter/unet_mechanical_meter.yaml --use_gpu --vis_dir vis_meter \ -TEST.TEST_MODEL "./contrib/MechanicalIndustryMeter/unet_mechanical_industry_meter/" -``` -可视化结果会保存在./vis_meter文件夹下。 - -### 6. 可视化结果示例: - - 原图: - - ![](MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.JPG) - - 预测结果: - - ![](MechanicalIndustryMeter/imgs/1560143028.5_IMG_3091.png) - -## AIStudio在线教程 - -PaddleSeg在AI Studio平台上提供了在线体验的教程,欢迎体验: - -|教程|链接| -|-|-| -|工业质检|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/184392)| -|人像分割|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/475345)| -|特色垂类模型|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/226710)| diff --git a/legacy/contrib/RemoteSensing/README.md b/legacy/contrib/RemoteSensing/README.md deleted file mode 100644 index ed0654a5f7..0000000000 --- a/legacy/contrib/RemoteSensing/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# PaddleSeg遥感影像分割 -遥感影像分割是图像分割领域中的重要应用场景,广泛应用于土地测绘、环境监测、城市建设等领域。遥感影像分割的目标多种多样,有诸如积雪、农作物、道路、建筑、水源等地物目标,也有例如云层的空中目标。 - -PaddleSeg遥感影像分割涵盖数据分析、预处理、数据增强、模型训练、预测等流程,帮助用户利用深度学习技术解决遥感影像分割问题。 - -## 特点 -- 针对遥感影像多通道、标注数据稀少的特点,我们支持多通道训练预测,内置10+多通道预处理和数据增强的策略,可结合实际业务场景进行定制组合,提升模型泛化能力和鲁棒性。 - -- 针对遥感影像分布范围广、分布不均的特点,我们提供数据分析工具,帮助深入了解数据组成、优化模型训练效果。为确保正常训练,我们提供数据校验工具,帮助排查数据问题。 - -- 内置U-Net, HRNet两种主流分割网络,可选择不同的损失函数如Dice Loss, BCE Loss等方式强化小目标和不均衡样本场景下的分割精度。 - -## 前置依赖 -**Note:** 若没有特殊说明,以下所有命令需要在`PaddleSeg/contrib/RemoteSensing/`目录下执行。 - -- Paddle 1.7.1+ -由于图像分割模型计算开销大,推荐在GPU版本的PaddlePaddle下使用。 -PaddlePaddle的安装, 请按照[官网指引](https://paddlepaddle.org.cn/install/quick)安装合适自己的版本。 - -- Python 3.5+ - -- 其他依赖安装 - -通过以下命令安装python包依赖,请确保至少执行过一次以下命令: -``` -pip install -r requirements.txt -``` -另外需要安装gdal. **Note:** 使用pip安装gdal可能出错,推荐使用conda进行安装: -``` -conda install gdal -``` - -## 目录结构说明 - ``` -RemoteSensing # 根目录 - |-- dataset # 数据集 - |-- docs # 文档 - |-- models # 模型类定义模块 - |-- nets # 组网模块 - |-- readers # 数据读取模块 - |-- tools # 工具集 - |-- transforms # 数据增强模块 - |-- utils # 公用模块 - |-- train_demo.py # 训练demo脚本 - |-- predict_demo.py # 预测demo脚本 - |-- visualize_demo.py # 可视化demo脚本 - |-- README.md # 使用手册 - - ``` - -## 使用教程 - -基于L8 SPARCS数据集进行云雪分割,提供数据准备、数据分析、训练、预测、可视化的全流程展示。 - -### 1. 数据准备 -#### L8 SPARCS数据集 -[L8 SPARCS](https://www.usgs.gov/land-resources/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs-validation)数据集包含80张 Landsat 8 卫星影像,涵盖10个波段。 -原始标注图片包含7个类别,分别是 “cloud”, “cloud shadow”, “shadow over water”, “snow/ice”, ”water”, “land”和”flooded”。 - -

- - -

- L8 SPARCS数据集示例 -

- -由于“flooded”和“shadow over water”2个类别占比仅为1.8%和0.24%,我们将其进行合并, -“flooded”归为“land”,“shadow over water”归为“shadow”,合并后标注包含5个类别。 - -数值、类别、颜色对应表: - -|Pixel value|Class|Color| -|---|---|---| -|0|cloud|white| -|1|shadow|black| -|2|snow/ice|cyan| -|3|water|blue| -|4|land|grey| - -执行以下命令下载并解压经过处理之后的数据集`remote_sensing_seg`: -```shell script -mkdir dataset && cd dataset -wget https://paddleseg.bj.bcebos.com/dataset/remote_sensing_seg.zip -unzip remote_sensing_seg.zip -cd .. -``` -其中`data`目录存放遥感影像,`data_vis`目录存放彩色合成预览图,`mask`目录存放标注图。 - -#### 数据协议 -对于您自己的数据集,需要按照我们的[数据协议](docs/data_prepare.md)进行数据准备。 - -### 2. 数据校验与分析 -为确保能正常训练,我们应该先对数据集进行校验。同时,遥感影像往往由许多波段组成,不同波段数据分布可能大相径庭,例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的组成、优化模型训练效果,需要对数据进行分析。 -具体步骤参见[数据校验与分析](docs/data_analyse_and_check.md)章节。 - -### 3. 模型训练 -#### (1) 设置GPU卡号 -```shell script -export CUDA_VISIBLE_DEVICES=0 -``` -#### (2) 以U-Net为例,在RemoteSensing目录下运行`train_demo.py`即可开始训练。 -```shell script -python train_demo.py --data_dir dataset/remote_sensing_seg \ ---model_type unet \ ---save_dir saved_model/remote_sensing_unet \ ---num_classes 5 \ ---channel 10 \ ---lr 0.01 \ ---clip_min_value 7172 6561 5777 5103 4291 4000 4000 4232 6934 7199 \ ---clip_max_value 50000 50000 50000 50000 50000 40000 30000 18000 40000 36000 \ ---mean 0.14311188522260637 0.14288498042151332 0.14812997807748615 0.16377211813814938 0.2737538363784552 0.2740934379398823 0.27749601919204 0.07767443032935262 0.5694699410349131 0.5549716085195542 \ ---std 0.09101632762467489 0.09600705942721106 0.096193618606776 0.10371446736389771 0.10911951586604118 0.11043593115173281 0.12648042598739268 0.027746262217260665 0.06822348076384514 0.062377591186668725 \ ---num_epochs 500 \ ---train_batch_size 3 -``` - -训练过程将自动开启边训边评估策略,并使用VisualDL保存训练日志,显示如下: -![](docs/imgs/visualdl.png) -`mIoU`最高的模型将自动保存在`saved_model/remote_sensing_unet/best_model`目录下,最高mIoU=0.7782 - -### 4. 模型预测 -#### (1) 设置GPU卡号 -```shell script -export CUDA_VISIBLE_DEVICES=0 -``` -#### (2) 以刚训练好的U-Net最优模型为例,在RemoteSensing目录下运行`predict_demo.py`即可开始预测。 -```shell script -python predict_demo.py --data_dir dataset/remote_sensing_seg/ \ ---file_list val.txt \ ---load_model_dir saved_model/remote_sensing_unet/best_model \ ---save_img_dir saved_model/remote_sensing_unet/best_model/predict \ ---color_map 255 255 255 0 0 0 0 255 255 0 0 255 150 150 150 -``` - -### 5. 可视化 -我们提供可视化API对预测效果进行直观的展示和对比。每张可视化图片包括彩色合成预览图、标注图、预测结果,使得效果好坏一目了然。 -```shell script -python visualize_demo.py --data_dir dataset/remote_sensing_seg/ \ ---file_list val.txt \ ---pred_dir saved_model/remote_sensing_unet/best_model/predict \ ---save_dir saved_model/remote_sensing_unet/best_model/vis_results -```` -3张可视化图片示例: - -![](docs/imgs/vis.png) - -## API说明 - -您可以使用`RemoteSensing`目录下提供的API构建自己的分割代码。 - -- [数据处理-transforms](docs/transforms.md) diff --git a/legacy/contrib/RemoteSensing/__init__.py b/legacy/contrib/RemoteSensing/__init__.py deleted file mode 100644 index fc8620cab5..0000000000 --- a/legacy/contrib/RemoteSensing/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import utils -import nets -import models -import transforms -import readers -from utils.utils import get_environ_info - -env_info = get_environ_info() diff --git a/legacy/contrib/RemoteSensing/docs/data_analyse_and_check.md b/legacy/contrib/RemoteSensing/docs/data_analyse_and_check.md deleted file mode 100644 index c965639fe1..0000000000 --- a/legacy/contrib/RemoteSensing/docs/data_analyse_and_check.md +++ /dev/null @@ -1,133 +0,0 @@ -# 数据校验和分析 -为确保能正常训练,我们应该先对数据集进行校验。同时,遥感影像往往由许多波段组成,不同波段数据分布可能大相径庭,例如可见光波段和热红外波段分布十分不同。为了更深入了解数据的组成、优化模型训练效果,需要对数据进行分析。 -接下来以`remote_sensing_seg`数据集为例展示数据校验和分析的全过程。 - -## Step1 数据校验和初步分析 - -我们提供数据校验和分析的脚本,帮助您排查基本的数据问题,为如何配置训练参数提供指导。使用方式如下所示: -```shell script -python tools/data_analyse_and_check.py --data_dir 'dataset/remote_sensing_seg/' --num_classes 5 -``` -参数说明: -- --data_dir: 数据集所在目录 -- --num_classes: 数据的类别数 - -运行后,命令行将显示概览信息,详细的错误信息将以data_analyse_and_check.log文件保存到数据集所在目录。 - -### 数据校验 -数据校验内容如下: -#### 1 列表分割符校验(separator_check) -检查在`train.txt`,`val.txt`和`test.txt`列表文件中的分隔符设置是否正确。 - -#### 2 数据读取校验(imread_check) -检查是否能成功读取`train.txt`,`val.txt`,`test.txt`中所有图片。 - -若不正确返回错误信息。错误可能有多种情况,如数据集路径设置错误、图片损坏等。 - -#### 3 标注通道数校验(single_channel_label_check) -检查标注图的通道数。正确的标注图应该为单通道图像。 - -#### 4 标注类别校验(label_class_check) -检查实际标注类别是否和配置参数`num_classes`,`ignore_index`匹配。 - -**NOTE:** -标注图像类别数值必须在[0~(`num_classes`-1)]范围内或者为`ignore_index`。 -标注类别最好从0开始,否则可能影响精度。 - -#### 5 图像与标注图尺寸一致性校验(shape_check) -验证图像尺寸和对应标注图尺寸是否一致。 - - -### 数据分析 -数据统计分析内容如下: - -#### 1 标注类别统计(label_class_statistics) -统计每种类别的像素总数和所占比例。统计结果示例如下: -``` -Label class statistics: -(label class, percentage, total pixel number) = [(0, 0.1372, 2194601), (1, 0.0827, 1322459), (2, 0.0179, 286548), (3, 0.1067, 1706810), (4, 0.6556, 10489582)] -``` - -#### 2 图像尺寸范围统计(img_shape_range_statistics) -统计数据集中图片的最大和最小的宽高。 - -#### 3 图像通道数统计(img_channels_statistics) -统计数据集中图片的通道个数。 - -#### 4 数据范围统计(data_range_statistics) -逐通道地统计数据集的数值范围。 - -#### 5 数据分布统计(data_distribution_statistics) -逐通道地统计数据集分布。并将分布保存为`pkl`文件,方便后续可视化和数据裁剪。 - -#### 6 归一化系数计算(cal_normalize_coefficient) -逐通道地计算归一化系数mean、standard deviation. - -**备注:** 数据分析步骤1\~3在训练集、验证集、测试集上分别进行,步骤4\~6在整个数据集上进行。 - -## Step2 数据分布可视化,确定数据裁剪范围 -### 数据分布可视化 -我们提供可视化数据分布脚本,对数据集的数据分布按通道进行可视化。 -可视化需要先安装matplotlib: -```shell script -pip install matplotlib -``` -使用方式如下: -```shell script -python tools/data_distribution_vis.py --pkl_path 'dataset/remote_sensing_seg/img_pixel_statistics.pkl' -``` -参数说明: -- --pkl_path: 数据分布文件保存路径 - -其中部分通道的可视化效果如下: -![](./imgs/data_distribution.png) -需要注意的是,为便于观察,纵坐标为对数坐标。 - -### 确定数据裁剪范围 -遥感影像数据分布范围广,其中往往存在一些异常值,影响算法对实际数据分布的拟合效果。为更好地对数据进行归一化,需要抑制遥感影像中少量的异常值。 -我们可以根据上述的数据分布统计结果来确定数据裁剪范围,并在后续图像预处理过程中对超出范围的像素值通过截断进行校正,从而去除异常值带来的干扰。 - -例如对于上述数据分布进行逐通道数据裁剪,我们选取的截断范围是: -``` -裁剪范围最小值: clip_min_value = [7172, 6561, 5777, 5103, 4291, 4000, 4000, 4232, 6934, 7199] -裁剪范围最大值: clip_max_value = [50000, 50000, 50000, 50000, 50000, 40000, 30000, 18000, 40000, 36000] -``` - -## Step3 统计裁剪比例、归一化系数 -为避免数据裁剪范围选取不当带来的影响,应该统计异常值像素占比,确保受影响的像素比例不要过高。 -接着对裁剪后的数据计算归一化系数mean和standard deviation,用于图像预处理中的归一化参数设置。 - -使用方式如下: -```shell script -python tools/cal_norm_coef.py --data_dir 'dataset/remote_sensing_seg/' \ ---pkl_path 'dataset/remote_sensing_seg/img_pixel_statistics.pkl' \ ---clip_min_value 7172 6561 5777 5103 4291 4000 4000 4232 6934 7199 \ ---clip_max_value 50000 50000 50000 50000 50000 40000 30000 18000 40000 36000 -``` -参数说明: -- --data_dir: 数据集路径 -- --pkl_path: 数据分布文件保存路径 -- --clip_min_value: 数据裁剪范围最小值 -- --clip_max_value: 数据裁剪范围最大值 - -裁剪像素占比统计结果如下: -``` -channel 0, the percentage of pixels to be clipped = 0.0005625999999999687 -channel 1, the percentage of pixels to be clipped = 0.0011332250000000155 -channel 2, the percentage of pixels to be clipped = 0.0008772375000000165 -channel 3, the percentage of pixels to be clipped = 0.0013191750000000058 -channel 4, the percentage of pixels to be clipped = 0.0012433250000000173 -channel 5, the percentage of pixels to be clipped = 7.49875000000122e-05 -channel 6, the percentage of pixels to be clipped = 0.0006973750000000001 -channel 7, the percentage of pixels to be clipped = 4.950000000003563e-06 -channel 8, the percentage of pixels to be clipped = 0.00014873749999999575 -channel 9, the percentage of pixels to be clipped = 0.00011173750000004201 -``` -可看出,被裁剪像素占比均不超过0.2% - -裁剪后数据的归一化系数如下: -``` -Count the channel-by-channel mean and std of the image: -mean = [0.14311189 0.14288498 0.14812998 0.16377212 0.27375384 0.27409344 0.27749602 0.07767443 0.56946994 0.55497161] -std = [0.09101633 0.09600706 0.09619362 0.10371447 0.10911952 0.11043593 0.12648043 0.02774626 0.06822348 0.06237759] -``` diff --git a/legacy/contrib/RemoteSensing/docs/data_prepare.md b/legacy/contrib/RemoteSensing/docs/data_prepare.md deleted file mode 100644 index a81636a713..0000000000 --- a/legacy/contrib/RemoteSensing/docs/data_prepare.md +++ /dev/null @@ -1,144 +0,0 @@ -# 数据准备 - - -## 数据协议 -数据集包含原图、标注图及相应的文件列表文件。 - -### 数据格式 -遥感影像的格式多种多样,不同传感器产生的数据格式也可能不同。 -PaddleSeg已兼容以下4种格式图片读取: -- `tif` -- `png` -- `img` -- `npy` - -### 原图要求 -原图数据的尺寸应为(h, w, channel),其中h, w为图像的高和宽,channel为图像的通道数。 - -### 标注图要求 -标注图像必须为单通道图像,像素值即为对应的类别,像素标注类别需要从0开始递增。 -例如0,1,2,3表示有4种类别,标注类别最多为256类。其中可以指定特定的像素值用于表示该值的像素不参与训练和评估(默认为255)。 - -### 文件列表文件 -文件列表文件包括`train.txt`,`val.txt`,`test.txt`和`labels.txt`. -`train.txt`,`val.txt`和`test.txt`文本以空格为分割符分为两列,第一列为图像文件相对于dataset的相对路径,第二列为标注图像文件相对于dataset的相对路径。如下所示: -``` -images/xxx1.tif annotations/xxx1.png -images/xxx2.tif annotations/xxx2.png -... -``` -`labels.txt`: 每一行为一个单独的类别,相应的行号即为类别对应的id(行号从0开始),如下所示: -``` -labelA -labelB -... -``` - -## 数据集切分和文件列表生成 -数据集切分有2种方式:随机切分和手动切分。对于这2种方式,PaddleSeg均提供了生成文件列表的脚本,您可以按需要选择。 - -### 1 对数据集按比例随机切分,并生成文件列表 -数据文件结构如下: -``` -./dataset/ # 数据集根目录 -|--images # 原图目录 -| |--xxx1.tif -| |--... -| └--... -| -|--annotations # 标注图目录 -| |--xxx1.png -| |--... -| └--... -``` -其中,相应的文件名可根据需要自行定义。 - -使用命令如下,支持通过不同的Flags来开启特定功能。 -``` -python tools/split_dataset_list.py ${FLAGS} -``` -参数说明: -- dataset_root: 数据集根目录 -- images_dir_name: 原图目录名 -- labels_dir_name: 标注图目录名 - -FLAGS说明: - -|FLAG|含义|默认值|参数数目| -|-|-|-|-| -|--split|数据集切分比例|0.7 0.3 0|3| -|--separator|文件列表分隔符|" "|1| -|--format|图片和标签集的数据格式|"tif" "png"|2| -|--label_class|标注类别|'\_\_background\_\_' '\_\_foreground\_\_'|若干| -|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| - -运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`. - -**Note:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,将生成不含分隔符和标注图片路径的文件列表。 - -#### 使用示例 -``` -python tools/split_dataset_list.py images annotations --split 0.6 0.2 0.2 --format tif png -``` - -### 2 已经手工划分好数据集,按照目录结构生成文件列表 -数据目录手工划分成如下结构: -``` -./dataset/ # 数据集根目录 -├── annotations # 标注目录 -│   ├── test -│   │   ├── ... -│   │   └── ... -│   ├── train -│   │   ├── ... -│   │   └── ... -│   └── val -│   ├── ... -│   └── ... -└── images # 原图目录 - ├── test - │   ├── ... - │   └── ... - ├── train - │   ├── ... - │   └── ... - └── val - ├── ... - └── ... -``` -其中,相应的文件名可根据需要自行定义。 - -使用命令如下,支持通过不同的Flags来开启特定功能。 -``` -python tools/create_dataset_list.py ${FLAGS} -``` -参数说明: -- dataset_root: 数据集根目录 - -FLAGS说明: - -|FLAG|含义|默认值|参数数目| -|-|-|-|-| -|--separator|文件列表分隔符|" "|1| -|--folder|图片和标签集的文件夹名|"images" "annotations"|2| -|--second_folder|训练/验证/测试集的文件夹名|"train" "val" "test"|若干| -|--format|图片和标签集的数据格式|"tif" "png"|2| -|--label_class|标注类别|'\_\_background\_\_' '\_\_foreground\_\_'|若干| -|--postfix|按文件主名(无扩展名)是否包含指定后缀对图片和标签集进行筛选|"" ""(2个空字符)|2| - -运行后将在数据集根目录下生成`train.txt`,`val.txt`,`test.txt`和`labels.txt`. - -**Note:** 生成文件列表要求:要么原图和标注图片数量一致,要么只有原图,没有标注图片。若数据集缺少标注图片,将生成不含分隔符和标注图片路径的文件列表。 - -#### 使用示例 -若您已经按上述说明整理好了数据集目录结构,可以运行下面的命令生成文件列表。 - -``` -# 生成文件列表,其分隔符为空格,图片和标签集的数据格式都为png -python tools/create_dataset_list.py --separator " " --format png png -``` -``` -# 生成文件列表,其图片和标签集的文件夹名为img和gt,训练和验证集的文件夹名为training和validation,不生成测试集列表 -python tools/create_dataset_list.py \ - --folder img gt --second_folder training validation -``` diff --git a/legacy/contrib/RemoteSensing/docs/imgs/data_distribution.png b/legacy/contrib/RemoteSensing/docs/imgs/data_distribution.png deleted file mode 100644 index 6e1afea9ed..0000000000 Binary files a/legacy/contrib/RemoteSensing/docs/imgs/data_distribution.png and /dev/null differ diff --git a/legacy/contrib/RemoteSensing/docs/imgs/dataset.png b/legacy/contrib/RemoteSensing/docs/imgs/dataset.png deleted file mode 100644 index 1eafd8b95c..0000000000 Binary files a/legacy/contrib/RemoteSensing/docs/imgs/dataset.png and /dev/null differ diff --git a/legacy/contrib/RemoteSensing/docs/imgs/vis.png b/legacy/contrib/RemoteSensing/docs/imgs/vis.png deleted file mode 100644 index 370b2e61cf..0000000000 Binary files a/legacy/contrib/RemoteSensing/docs/imgs/vis.png and /dev/null differ diff --git a/legacy/contrib/RemoteSensing/docs/imgs/visualdl.png b/legacy/contrib/RemoteSensing/docs/imgs/visualdl.png deleted file mode 100644 index d7aa3e5828..0000000000 Binary files a/legacy/contrib/RemoteSensing/docs/imgs/visualdl.png and /dev/null differ diff --git a/legacy/contrib/RemoteSensing/docs/transforms.md b/legacy/contrib/RemoteSensing/docs/transforms.md deleted file mode 100644 index a35e6cd1bd..0000000000 --- a/legacy/contrib/RemoteSensing/docs/transforms.md +++ /dev/null @@ -1,145 +0,0 @@ -# transforms.transforms - -对用于分割任务的数据进行操作。可以利用[Compose](#compose)类将图像预处理/增强操作进行组合。 - - -## Compose类 -```python -transforms.transforms.Compose(transforms) -``` -根据数据预处理/数据增强列表对输入数据进行操作。 -### 参数 -* **transforms** (list): 数据预处理/数据增强列表。 - - -## RandomHorizontalFlip类 -```python -transforms.transforms.RandomHorizontalFlip(prob=0.5) -``` -以一定的概率对图像进行水平翻转,模型训练时的数据增强操作。 -### 参数 -* **prob** (float): 随机水平翻转的概率。默认值为0.5。 - - -## RandomVerticalFlip类 -```python -transforms.transforms.RandomVerticalFlip(prob=0.1) -``` -以一定的概率对图像进行垂直翻转,模型训练时的数据增强操作。 -### 参数 -* **prob** (float): 随机垂直翻转的概率。默认值为0.1。 - - -## Resize类 -```python -transforms.transforms.Resize(target_size, interp='LINEAR') -``` -调整图像大小(resize)。 - -- 当目标大小(target_size)类型为int时,根据插值方式, - 将图像resize为[target_size, target_size]。 -- 当目标大小(target_size)类型为list或tuple时,根据插值方式, - 将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。 -### 参数 -* **target_size** (int|list|tuple): 目标大小 -* **interp** (str): resize的插值方式,与opencv的插值方式对应, -可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。 - - -## ResizeByLong类 -```python -transforms.transforms.ResizeByLong(long_size) -``` -对图像长边resize到固定值,短边按比例进行缩放。 -### 参数 -* **long_size** (int): resize后图像的长边大小。 - - -## ResizeRangeScaling类 -```python -transforms.transforms.ResizeRangeScaling(min_value=400, max_value=600) -``` -对图像长边随机resize到指定范围内,短边按比例进行缩放,模型训练时的数据增强操作。 -### 参数 -* **min_value** (int): 图像长边resize后的最小值。默认值400。 -* **max_value** (int): 图像长边resize后的最大值。默认值600。 - - -## ResizeStepScaling类 -```python -transforms.transforms.ResizeStepScaling(min_scale_factor=0.75, max_scale_factor=1.25, scale_step_size=0.25) -``` -对图像按照某一个比例resize,这个比例以scale_step_size为步长,在[min_scale_factor, max_scale_factor]随机变动,模型训练时的数据增强操作。 -### 参数 -* **min_scale_factor**(float), resize最小尺度。默认值0.75。 -* **max_scale_factor** (float), resize最大尺度。默认值1.25。 -* **scale_step_size** (float), resize尺度范围间隔。默认值0.25。 - - -## Clip类 -```python -transforms.transforms.Clip(min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0]) -``` -对图像上超出一定范围的数据进行裁剪。 - -### 参数 -* **min_var** (list): 裁剪的下限,小于min_val的数值均设为min_val. 默认值[0, 0, 0]. -* **max_var** (list): 裁剪的上限,大于max_val的数值均设为max_val. 默认值[255.0, 255.0, 255.0] - - -## Normalize类 -```python -transforms.transforms.Normalize(min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0], mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -``` -对图像进行标准化。 - -1.图像像素归一化到区间 [0.0, 1.0]。 -2.对图像进行减均值除以标准差操作。 -### 参数 -* **min_val** (list): 图像数据集的最小值。默认值[0, 0, 0]. -* **max_val** (list): 图像数据集的最大值。默认值[255.0, 255.0, 255.0] -* **mean** (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]。 -* **std** (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]。 - - -## Padding类 -```python -transforms.transforms.Padding(target_size, im_padding_value=127.5, label_padding_value=255) -``` -对图像或标注图像进行padding,padding方向为右和下。根据提供的值对图像或标注图像进行padding操作。 -### 参数 -* **target_size** (int|list|tuple): padding后图像的大小。 -* **im_padding_value** (list): 图像padding的值。默认为127.5 -* **label_padding_value** (int): 标注图像padding的值。默认值为255(仅在训练时需要设定该参数)。 - - -## RandomPaddingCrop类 -```python -transforms.transforms.RandomPaddingCrop(crop_size=512, im_padding_value=127.5, label_padding_value=255) -``` -对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作,模型训练时的数据增强操作。 -### 参数 -* **crop_size**(int|list|tuple): 裁剪图像大小。默认为512。 -* **im_padding_value** (list): 图像padding的值。默认为127.5。 -* **label_padding_value** (int): 标注图像padding的值。默认值为255。 - - -## RandomBlur类 -```python -transforms.transforms.RandomBlur(prob=0.1) -``` -以一定的概率对图像进行高斯模糊,模型训练时的数据增强操作。 -### 参数 -* **prob** (float): 图像模糊概率。默认为0.1。 - - -## RandomScaleAspect类 -```python -transforms.transforms.RandomScaleAspect(min_scale=0.5, aspect_ratio=0.33) -``` -裁剪并resize回原始尺寸的图像和标注图像,模型训练时的数据增强操作。 - -按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 -### 参数 -* **min_scale** (float):裁取图像占原始图像的面积比,取值[0,1],为0时则返回原图。默认为0.5。 -* **aspect_ratio** (float): 裁取图像的宽高比范围,非负值,为0时返回原图。默认为0.33。 diff --git a/legacy/contrib/RemoteSensing/models/__init__.py b/legacy/contrib/RemoteSensing/models/__init__.py deleted file mode 100644 index aae31421b3..0000000000 --- a/legacy/contrib/RemoteSensing/models/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .load_model import * -from .unet import * -from .hrnet import * diff --git a/legacy/contrib/RemoteSensing/models/base.py b/legacy/contrib/RemoteSensing/models/base.py deleted file mode 100644 index 0a0b73ce26..0000000000 --- a/legacy/contrib/RemoteSensing/models/base.py +++ /dev/null @@ -1,621 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import paddle.fluid as fluid -import os -import numpy as np -import time -import math -import yaml -import tqdm -import cv2 -import copy -import utils.logging as logging -from collections import OrderedDict -from os import path as osp -from utils.utils import seconds_to_hms, get_environ_info -from utils.metrics import ConfusionMatrix -import transforms.transforms as T -import utils -import paddle - - -def save_infer_program(test_program, ckpt_dir): - _test_program = test_program.clone() - _test_program.desc.flush() - _test_program.desc._set_version() - utils.paddle_utils.save_op_version_info(_test_program.desc) - with open(os.path.join(ckpt_dir, 'model') + ".pdmodel", "wb") as f: - f.write(_test_program.desc.serialize_to_string()) - - -def dict2str(dict_input): - out = '' - for k, v in dict_input.items(): - try: - v = round(float(v), 6) - except: - pass - out = out + '{}={}, '.format(k, v) - return out.strip(', ') - - -class BaseModel(object): - def __init__(self, - num_classes=2, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - self.init_params = locals() - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.sync_bn = sync_bn - - self.labels = None - self.env_info = get_environ_info() - if self.env_info['place'] == 'cpu': - self.places = fluid.cpu_places() - else: - self.places = fluid.cuda_places() - self.exe = fluid.Executor(self.places[0]) - self.train_prog = None - self.test_prog = None - self.parallel_train_prog = None - self.train_inputs = None - self.test_inputs = None - self.train_outputs = None - self.test_outputs = None - self.train_data_loader = None - self.eval_metrics = None - # 当前模型状态 - self.status = 'Normal' - - def _get_single_card_bs(self, batch_size): - if batch_size % len(self.places) == 0: - return int(batch_size // len(self.places)) - else: - raise Exception("Please support correct batch_size, \ - which can be divided by available cards({}) in {}". - format(self.env_info['num'], - self.env_info['place'])) - - def build_net(self, mode='train'): - """应根据不同的情况进行构建""" - pass - - def build_program(self): - # build training network - self.train_inputs, self.train_outputs = self.build_net(mode='train') - self.train_prog = fluid.default_main_program() - startup_prog = fluid.default_startup_program() - - # build prediction network - self.test_prog = fluid.Program() - with fluid.program_guard(self.test_prog, startup_prog): - with fluid.unique_name.guard(): - self.test_inputs, self.test_outputs = self.build_net( - mode='test') - self.test_prog = self.test_prog.clone(for_test=True) - - def arrange_transform(self, transforms, mode='train'): - arrange_transform = T.ArrangeSegmenter - if type(transforms.transforms[-1]).__name__.startswith('Arrange'): - transforms.transforms[-1] = arrange_transform(mode=mode) - else: - transforms.transforms.append(arrange_transform(mode=mode)) - - def build_train_data_loader(self, dataset, batch_size): - # init data_loader - if self.train_data_loader is None: - self.train_data_loader = fluid.io.DataLoader.from_generator( - feed_list=list(self.train_inputs.values()), - capacity=64, - use_double_buffer=True, - iterable=True) - batch_size_each_gpu = self._get_single_card_bs(batch_size) - self.train_data_loader.set_sample_list_generator( - dataset.generator(batch_size=batch_size_each_gpu), - places=self.places) - - def net_initialize(self, - startup_prog=None, - pretrain_weights=None, - resume_weights=None): - if startup_prog is None: - startup_prog = fluid.default_startup_program() - self.exe.run(startup_prog) - if resume_weights is not None: - logging.info("Resume weights from {}".format(resume_weights)) - if not osp.exists(resume_weights): - raise Exception("Path {} not exists.".format(resume_weights)) - fluid.load(self.train_prog, osp.join(resume_weights, 'model'), - self.exe) - # Check is path ended by path spearator - if resume_weights[-1] == os.sep: - resume_weights = resume_weights[0:-1] - epoch_name = osp.basename(resume_weights) - # If resume weights is end of digit, restore epoch status - epoch = epoch_name.split('_')[-1] - if epoch.isdigit(): - self.begin_epoch = int(epoch) - else: - raise ValueError("Resume model path is not valid!") - logging.info("Model checkpoint loaded successfully!") - - elif pretrain_weights is not None: - logging.info( - "Load pretrain weights from {}.".format(pretrain_weights)) - utils.load_pretrained_weights(self.exe, self.train_prog, - pretrain_weights) - - def get_model_info(self): - # 存储相应的信息到yml文件 - info = dict() - info['Model'] = self.__class__.__name__ - if 'self' in self.init_params: - del self.init_params['self'] - if '__class__' in self.init_params: - del self.init_params['__class__'] - info['_init_params'] = self.init_params - - info['_Attributes'] = dict() - info['_Attributes']['num_classes'] = self.num_classes - info['_Attributes']['labels'] = self.labels - try: - info['_Attributes']['eval_metric'] = dict() - for k, v in self.eval_metrics.items(): - if isinstance(v, np.ndarray): - if v.size > 1: - v = [float(i) for i in v] - else: - v = float(v) - info['_Attributes']['eval_metric'][k] = v - except: - pass - - if hasattr(self, 'test_transforms'): - if self.test_transforms is not None: - info['test_transforms'] = list() - for op in self.test_transforms.transforms: - name = op.__class__.__name__ - attr = op.__dict__ - info['test_transforms'].append({name: attr}) - - if hasattr(self, 'train_transforms'): - if self.train_transforms is not None: - info['train_transforms'] = list() - for op in self.train_transforms.transforms: - name = op.__class__.__name__ - attr = op.__dict__ - info['train_transforms'].append({name: attr}) - - if hasattr(self, 'train_init'): - if 'self' in self.train_init: - del self.train_init['self'] - if 'train_reader' in self.train_init: - del self.train_init['train_reader'] - if 'eval_reader' in self.train_init: - del self.train_init['eval_reader'] - if 'optimizer' in self.train_init: - del self.train_init['optimizer'] - info['train_init'] = self.train_init - return info - - def save_model(self, save_dir): - if not osp.isdir(save_dir): - if osp.exists(save_dir): - os.remove(save_dir) - os.makedirs(save_dir) - model_info = self.get_model_info() - - if self.status == 'Normal': - fluid.save(self.train_prog, osp.join(save_dir, 'model')) - save_infer_program(self.test_prog, save_dir) - - model_info['status'] = self.status - with open( - osp.join(save_dir, 'model.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(model_info, f) - - # The flag of model for saving successfully - open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model saved in {}.".format(save_dir)) - - def export_inference_model(self, save_dir): - test_input_names = [var.name for var in list(self.test_inputs.values())] - test_outputs = list(self.test_outputs.values()) - fluid.io.save_inference_model( - dirname=save_dir, - executor=self.exe, - params_filename='__params__', - feeded_var_names=test_input_names, - target_vars=test_outputs, - main_program=self.test_prog) - model_info = self.get_model_info() - model_info['status'] = 'Infer' - - # Save input and output descrition of model - model_info['_ModelInputsOutputs'] = dict() - model_info['_ModelInputsOutputs']['test_inputs'] = [ - [k, v.name] for k, v in self.test_inputs.items() - ] - model_info['_ModelInputsOutputs']['test_outputs'] = [ - [k, v.name] for k, v in self.test_outputs.items() - ] - - with open( - osp.join(save_dir, 'model.yml'), encoding='utf-8', - mode='w') as f: - yaml.dump(model_info, f) - - # The flag of model for saving successfully - open(osp.join(save_dir, '.success'), 'w').close() - logging.info("Model for inference deploy saved in {}.".format(save_dir)) - - def default_optimizer(self, - learning_rate, - num_epochs, - num_steps_each_epoch, - lr_decay_power=0.9, - regularization_coeff=4e-5): - decay_step = num_epochs * num_steps_each_epoch - lr_decay = fluid.layers.polynomial_decay( - learning_rate, - decay_step, - end_learning_rate=0, - power=lr_decay_power) - optimizer = fluid.optimizer.Momentum( - lr_decay, - momentum=0.9, - regularization=fluid.regularizer.L2Decay( - regularization_coeff=regularization_coeff)) - return optimizer - - def train(self, - num_epochs, - train_reader, - train_batch_size=2, - eval_reader=None, - eval_best_metric=None, - save_interval_epochs=1, - log_interval_steps=2, - save_dir='output', - pretrain_weights=None, - resume_weights=None, - optimizer=None, - learning_rate=0.01, - lr_decay_power=0.9, - regularization_coeff=4e-5, - use_vdl=False): - self.labels = train_reader.labels - self.train_transforms = train_reader.transforms - self.train_init = locals() - self.begin_epoch = 0 - - if optimizer is None: - num_steps_each_epoch = train_reader.num_samples // train_batch_size - optimizer = self.default_optimizer( - learning_rate=learning_rate, - num_epochs=num_epochs, - num_steps_each_epoch=num_steps_each_epoch, - lr_decay_power=lr_decay_power, - regularization_coeff=regularization_coeff) - self.optimizer = optimizer - self.build_program() - self.net_initialize( - startup_prog=fluid.default_startup_program(), - pretrain_weights=pretrain_weights, - resume_weights=resume_weights) - - if self.begin_epoch >= num_epochs: - raise ValueError( - ("begin epoch[{}] is larger than num_epochs[{}]").format( - self.begin_epoch, num_epochs)) - - if not osp.isdir(save_dir): - if osp.exists(save_dir): - os.remove(save_dir) - os.makedirs(save_dir) - - # add arrange op tor transforms - self.arrange_transform(transforms=train_reader.transforms, mode='train') - self.build_train_data_loader( - dataset=train_reader, batch_size=train_batch_size) - - if eval_reader is not None: - self.eval_transforms = eval_reader.transforms - self.test_transforms = copy.deepcopy(eval_reader.transforms) - - lr = self.optimizer._learning_rate - lr.persistable = True - if isinstance(lr, fluid.framework.Variable): - self.train_outputs['lr'] = lr - - # 多卡训练 - if self.parallel_train_prog is None: - build_strategy = fluid.compiler.BuildStrategy() - if self.env_info['place'] != 'cpu' and len(self.places) > 1: - build_strategy.sync_batch_norm = self.sync_bn - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_iteration_per_drop_scope = 1 - - self.parallel_train_prog = fluid.CompiledProgram( - self.train_prog).with_data_parallel( - loss_name=self.train_outputs['loss'].name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - - total_num_steps = math.floor( - train_reader.num_samples / train_batch_size) - num_steps = 0 - time_stat = list() - time_train_one_epoch = None - time_eval_one_epoch = None - - total_num_steps_eval = 0 - # eval times - total_eval_times = math.ceil(num_epochs / save_interval_epochs) - eval_batch_size = train_batch_size - if eval_reader is not None: - total_num_steps_eval = math.ceil( - eval_reader.num_samples / eval_batch_size) - - if use_vdl: - from visualdl import LogWriter - vdl_logdir = osp.join(save_dir, 'vdl_log') - log_writer = LogWriter(vdl_logdir) - best_metric = -1.0 - best_model_epoch = 1 - for i in range(self.begin_epoch, num_epochs): - records = list() - step_start_time = time.time() - epoch_start_time = time.time() - for step, data in enumerate(self.train_data_loader()): - outputs = self.exe.run( - self.parallel_train_prog, - feed=data, - fetch_list=list(self.train_outputs.values())) - outputs_avg = np.mean(np.array(outputs), axis=1) - records.append(outputs_avg) - - # time estimated to complete the training - currend_time = time.time() - step_cost_time = currend_time - step_start_time - step_start_time = currend_time - if len(time_stat) < 20: - time_stat.append(step_cost_time) - else: - time_stat[num_steps % 20] = step_cost_time - - num_steps += 1 - if num_steps % log_interval_steps == 0: - step_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), outputs_avg)) - - if use_vdl: - for k, v in step_metrics.items(): - log_writer.add_scalar( - step=num_steps, - tag='train/{}'.format(k), - value=v) - - # 计算剩余时间 - avg_step_time = np.mean(time_stat) - if time_train_one_epoch is not None: - eta = (num_epochs - i - 1) * time_train_one_epoch + ( - total_num_steps - step - 1) * avg_step_time - else: - eta = ((num_epochs - i) * total_num_steps - step - - 1) * avg_step_time - if time_eval_one_epoch is not None: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * time_eval_one_epoch - else: - eval_eta = (total_eval_times - i // save_interval_epochs - ) * total_num_steps_eval * avg_step_time - eta_str = seconds_to_hms(eta + eval_eta) - - logging.info( - "[TRAIN] Epoch={}/{}, Step={}/{}, {}, time_each_step={}s, eta={}" - .format(i + 1, num_epochs, step + 1, total_num_steps, - dict2str(step_metrics), round(avg_step_time, 2), - eta_str)) - - train_metrics = OrderedDict( - zip(list(self.train_outputs.keys()), np.mean(records, axis=0))) - logging.info('[TRAIN] Epoch {} finished, {} .'.format( - i + 1, dict2str(train_metrics))) - time_train_one_epoch = time.time() - epoch_start_time - - eval_epoch_start_time = time.time() - if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1: - current_save_dir = osp.join(save_dir, "epoch_{}".format(i + 1)) - if not osp.isdir(current_save_dir): - os.makedirs(current_save_dir) - if eval_reader is not None: - self.eval_metrics = self.evaluate( - eval_reader=eval_reader, - batch_size=eval_batch_size, - epoch_id=i + 1) - # 保存最优模型 - current_metric = self.eval_metrics[eval_best_metric] - if current_metric > best_metric: - best_metric = current_metric - best_model_epoch = i + 1 - best_model_dir = osp.join(save_dir, "best_model") - self.save_model(save_dir=best_model_dir) - if use_vdl: - for k, v in self.eval_metrics.items(): - if isinstance(v, list): - continue - if isinstance(v, np.ndarray): - if v.size > 1: - continue - log_writer.add_scalar( - step=num_steps, - tag='evaluate/{}'.format(k), - value=v) - self.save_model(save_dir=current_save_dir) - time_eval_one_epoch = time.time() - eval_epoch_start_time - if eval_reader is not None: - logging.info( - 'Current evaluated best model in validation dataset is epoch_{}, {}={}' - .format(best_model_epoch, eval_best_metric, - best_metric)) - - def evaluate(self, eval_reader, batch_size=1, epoch_id=None): - """评估。 - - Args: - eval_reader (reader): 评估数据读取器。 - batch_size (int): 评估时的batch大小。默认1。 - epoch_id (int): 当前评估模型所在的训练轮数。 - return_details (bool): 是否返回详细信息。默认False。 - - Returns: - dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 - 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 - tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), - 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 - """ - self.arrange_transform(transforms=eval_reader.transforms, mode='train') - total_steps = math.ceil(eval_reader.num_samples * 1.0 / batch_size) - conf_mat = ConfusionMatrix(self.num_classes, streaming=True) - data_generator = eval_reader.generator( - batch_size=batch_size, drop_last=False) - if not hasattr(self, 'parallel_test_prog'): - self.parallel_test_prog = fluid.CompiledProgram( - self.test_prog).with_data_parallel( - share_vars_from=self.parallel_train_prog) - logging.info( - "Start to evaluating(total_samples={}, total_steps={})...".format( - eval_reader.num_samples, total_steps)) - for step, data in tqdm.tqdm( - enumerate(data_generator()), total=total_steps): - images = np.array([d[0] for d in data]) - images = images.astype(np.float32) - labels = np.array([d[1] for d in data]) - num_samples = images.shape[0] - if num_samples < batch_size: - num_pad_samples = batch_size - num_samples - pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) - images = np.concatenate([images, pad_images]) - feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) - pred = outputs[0] - if num_samples < batch_size: - pred = pred[0:num_samples] - - mask = labels != self.ignore_index - conf_mat.calculate(pred=pred, label=labels, ignore=mask) - _, iou = conf_mat.mean_iou() - - logging.debug("[EVAL] Epoch={}, Step={}/{}, iou={}".format( - epoch_id, step + 1, total_steps, iou)) - - category_iou, miou = conf_mat.mean_iou() - category_acc, macc = conf_mat.accuracy() - precision, recall = conf_mat.precision_recall() - - metrics = OrderedDict( - zip([ - 'miou', 'category_iou', 'macc', 'category_acc', 'kappa', - 'precision', 'recall' - ], [ - miou, category_iou, macc, category_acc, - conf_mat.kappa(), precision, recall - ])) - - logging.info('[EVAL] Finished, Epoch={}, {} .'.format( - epoch_id, dict2str(metrics))) - return metrics - - def predict(self, im_file, transforms=None): - """预测。 - Args: - img_file(str|np.ndarray): 预测图像。 - transforms(transforms.transforms): 数据预处理操作。 - - Returns: - dict: 包含关键字'label_map'和'score_map', 'label_map'存储预测结果灰度图, - 像素值表示对应的类别,'score_map'存储各类别的概率,shape=(h, w, num_classes) - """ - if isinstance(im_file, str): - if not osp.exists(im_file): - raise ValueError( - 'The Image file does not exist: {}'.format(im_file)) - - if transforms is None and not hasattr(self, 'test_transforms'): - raise Exception("transforms need to be defined, now is None.") - if transforms is not None: - self.arrange_transform(transforms=transforms, mode='test') - im, im_info = transforms(im_file) - else: - self.arrange_transform(transforms=self.test_transforms, mode='test') - im, im_info = self.test_transforms(im_file) - im = im.astype(np.float32) - im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) - pred = result[0] - logit = result[1] - logit = np.squeeze(logit) - logit = np.transpose(logit, (1, 2, 0)) - pred = np.squeeze(pred).astype('uint8') - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] - pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - logit = cv2.resize(logit, (w, h), cv2.INTER_LINEAR) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] - pred = pred[0:h, 0:w] - logit = logit[0:h, 0:w, :] - - return {'label_map': pred, 'score_map': logit} diff --git a/legacy/contrib/RemoteSensing/models/hrnet.py b/legacy/contrib/RemoteSensing/models/hrnet.py deleted file mode 100644 index 65663386c9..0000000000 --- a/legacy/contrib/RemoteSensing/models/hrnet.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import paddle.fluid as fluid -import os -from os import path as osp -import numpy as np -from collections import OrderedDict -import copy -import math -import time -import tqdm -import cv2 -import yaml -import utils -import utils.logging as logging -from utils.utils import seconds_to_hms, get_environ_info -from utils.metrics import ConfusionMatrix -import nets -import transforms.transforms as T -from .base import BaseModel - - -def dict2str(dict_input): - out = '' - for k, v in dict_input.items(): - try: - v = round(float(v), 6) - except: - pass - out = out + '{}={}, '.format(k, v) - return out.strip(', ') - - -class HRNet(BaseModel): - def __init__(self, - num_classes=2, - input_channel=3, - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[18, 36], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[18, 36, 72, 144], - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - super().__init__( - num_classes=num_classes, - use_bce_loss=use_bce_loss, - use_dice_loss=use_dice_loss, - class_weight=class_weight, - ignore_index=ignore_index, - sync_bn=sync_bn) - self.init_params = locals() - self.input_channel = input_channel - self.stage1_num_modules = stage1_num_modules - self.stage1_num_blocks = stage1_num_blocks - self.stage1_num_channels = stage1_num_channels - self.stage2_num_modules = stage2_num_modules - self.stage2_num_blocks = stage2_num_blocks - self.stage2_num_channels = stage2_num_channels - self.stage3_num_modules = stage3_num_modules - self.stage3_num_blocks = stage3_num_blocks - self.stage3_num_channels = stage3_num_channels - self.stage4_num_modules = stage4_num_modules - self.stage4_num_blocks = stage4_num_blocks - self.stage4_num_channels = stage4_num_channels - - def build_net(self, mode='train'): - """应根据不同的情况进行构建""" - model = nets.HRNet( - self.num_classes, - self.input_channel, - mode=mode, - stage1_num_modules=self.stage1_num_modules, - stage1_num_blocks=self.stage1_num_blocks, - stage1_num_channels=self.stage1_num_channels, - stage2_num_modules=self.stage2_num_modules, - stage2_num_blocks=self.stage2_num_blocks, - stage2_num_channels=self.stage2_num_channels, - stage3_num_modules=self.stage3_num_modules, - stage3_num_blocks=self.stage3_num_blocks, - stage3_num_channels=self.stage3_num_channels, - stage4_num_modules=self.stage4_num_modules, - stage4_num_blocks=self.stage4_num_blocks, - stage4_num_channels=self.stage4_num_channels, - use_bce_loss=self.use_bce_loss, - use_dice_loss=self.use_dice_loss, - class_weight=self.class_weight, - ignore_index=self.ignore_index) - inputs = model.generate_inputs() - model_out = model.build_net(inputs) - outputs = OrderedDict() - if mode == 'train': - self.optimizer.minimize(model_out) - outputs['loss'] = model_out - else: - outputs['pred'] = model_out[0] - outputs['logit'] = model_out[1] - return inputs, outputs - - def train(self, - num_epochs, - train_reader, - train_batch_size=2, - eval_reader=None, - eval_best_metric='miou', - save_interval_epochs=1, - log_interval_steps=2, - save_dir='output', - pretrain_weights=None, - resume_weights=None, - optimizer=None, - learning_rate=0.01, - lr_decay_power=0.9, - regularization_coeff=5e-4, - use_vdl=False): - super().train( - num_epochs=num_epochs, - train_reader=train_reader, - train_batch_size=train_batch_size, - eval_reader=eval_reader, - eval_best_metric=eval_best_metric, - save_interval_epochs=save_interval_epochs, - log_interval_steps=log_interval_steps, - save_dir=save_dir, - pretrain_weights=pretrain_weights, - resume_weights=resume_weights, - optimizer=optimizer, - learning_rate=learning_rate, - lr_decay_power=lr_decay_power, - regularization_coeff=regularization_coeff, - use_vdl=use_vdl) diff --git a/legacy/contrib/RemoteSensing/models/load_model.py b/legacy/contrib/RemoteSensing/models/load_model.py deleted file mode 100644 index 1fcf22b072..0000000000 --- a/legacy/contrib/RemoteSensing/models/load_model.py +++ /dev/null @@ -1,89 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import yaml -import os.path as osp -import six -import copy -from collections import OrderedDict -import paddle.fluid as fluid -from paddle.fluid.framework import Parameter -from utils import logging -import models - - -def load_model(model_dir): - if not osp.exists(osp.join(model_dir, "model.yml")): - raise Exception("There's no model.yml in {}".format(model_dir)) - with open(osp.join(model_dir, "model.yml")) as f: - info = yaml.load(f.read(), Loader=yaml.Loader) - status = info['status'] - - if not hasattr(models, info['Model']): - raise Exception("There's no attribute {} in models".format( - info['Model'])) - - model = getattr(models, info['Model'])(**info['_init_params']) - if status == "Normal": - startup_prog = fluid.Program() - model.test_prog = fluid.Program() - with fluid.program_guard(model.test_prog, startup_prog): - with fluid.unique_name.guard(): - model.test_inputs, model.test_outputs = model.build_net( - mode='test') - model.test_prog = model.test_prog.clone(for_test=True) - model.exe.run(startup_prog) - import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: - load_dict = pickle.load(f) - fluid.io.set_program_state(model.test_prog, load_dict) - - elif status == "Infer": - [prog, input_names, outputs] = fluid.io.load_inference_model( - model_dir, model.exe, params_filename='__params__') - model.test_prog = prog - test_outputs_info = info['_ModelInputsOutputs']['test_outputs'] - model.test_inputs = OrderedDict() - model.test_outputs = OrderedDict() - for name in input_names: - model.test_inputs[name] = model.test_prog.global_block().var(name) - for i, out in enumerate(outputs): - var_desc = test_outputs_info[i] - model.test_outputs[var_desc[0]] = out - if 'test_transforms' in info: - model.test_transforms = build_transforms(info['test_transforms']) - model.eval_transforms = copy.deepcopy(model.test_transforms) - - if '_Attributes' in info: - for k, v in info['_Attributes'].items(): - if k in model.__dict__: - model.__dict__[k] = v - - logging.info("Model[{}] loaded.".format(info['Model'])) - return model - - -def build_transforms(transforms_info): - from transforms import transforms as T - transforms = list() - for op_info in transforms_info: - op_name = list(op_info.keys())[0] - op_attr = op_info[op_name] - if not hasattr(T, op_name): - raise Exception( - "There's no operator named '{}' in transforms".format(op_name)) - transforms.append(getattr(T, op_name)(**op_attr)) - eval_transforms = T.Compose(transforms) - return eval_transforms diff --git a/legacy/contrib/RemoteSensing/models/unet.py b/legacy/contrib/RemoteSensing/models/unet.py deleted file mode 100644 index 3ae0b780a5..0000000000 --- a/legacy/contrib/RemoteSensing/models/unet.py +++ /dev/null @@ -1,292 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -import numpy as np -import math -import cv2 -import paddle.fluid as fluid -import utils.logging as logging -from collections import OrderedDict -from .base import BaseModel -from utils.metrics import ConfusionMatrix -import nets - - -class UNet(BaseModel): - """实现UNet网络的构建并进行训练、评估、预测和模型导出。 - - Args: - num_classes (int): 类别数。 - upsample_mode (str): UNet decode时采用的上采样方式,取值为'bilinear'时利用双线行差值进行上菜样, - 当输入其他选项时则利用反卷积进行上菜样,默认为'bilinear'。 - use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。默认False。 - use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 - 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。默认False。 - class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 - num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 - 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, - 即平时使用的交叉熵损失函数。 - ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。默认255。 - - Raises: - ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 - ValueError: class_weight为list, 但长度不等于num_class。 - class_weight为str, 但class_weight.low()不等于dynamic。 - TypeError: class_weight不为None时,其类型不是list或str。 - """ - - def __init__(self, - num_classes=2, - upsample_mode='bilinear', - input_channel=3, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255, - sync_bn=True): - super().__init__( - num_classes=num_classes, - use_bce_loss=use_bce_loss, - use_dice_loss=use_dice_loss, - class_weight=class_weight, - ignore_index=ignore_index, - sync_bn=sync_bn) - self.init_params = locals() - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - self.num_classes = num_classes - self.upsample_mode = upsample_mode - self.input_channel = input_channel - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.labels = None - # 若模型是从inference model加载进来的,无法调用训练接口进行训练 - self.trainable = True - - def build_net(self, mode='train'): - model = nets.UNet( - self.num_classes, - mode=mode, - upsample_mode=self.upsample_mode, - input_channel=self.input_channel, - use_bce_loss=self.use_bce_loss, - use_dice_loss=self.use_dice_loss, - class_weight=self.class_weight, - ignore_index=self.ignore_index) - inputs = model.generate_inputs() - model_out = model.build_net(inputs) - outputs = OrderedDict() - if mode == 'train': - self.optimizer.minimize(model_out) - outputs['loss'] = model_out - elif mode == 'eval': - outputs['loss'] = model_out[0] - outputs['pred'] = model_out[1] - outputs['label'] = model_out[2] - outputs['mask'] = model_out[3] - else: - outputs['pred'] = model_out[0] - outputs['logit'] = model_out[1] - return inputs, outputs - - def train(self, - num_epochs, - train_reader, - train_batch_size=2, - eval_reader=None, - eval_best_metric='miou', - save_interval_epochs=1, - log_interval_steps=2, - save_dir='output', - pretrain_weights=None, - resume_weights=None, - optimizer=None, - learning_rate=0.01, - lr_decay_power=0.9, - regularization_coeff=5e-4, - use_vdl=False): - """训练。 - - Args: - num_epochs (int): 训练迭代轮数。 - train_reader (readers): 训练数据读取器。 - train_batch_size (int): 训练数据batch大小。同时作为验证数据batch大小。默认2。 - eval_reader (readers): 边训边评估的评估数据读取器。 - eval_best_metric (str): 边训边评估保存最好模型的指标。默认为'kappa'。 - save_interval_epochs (int): 模型保存间隔(单位:迭代轮数)。默认为1。 - log_interval_steps (int): 训练日志输出间隔(单位:迭代次数)。默认为2。 - save_dir (str): 模型保存路径。默认'output'。 - pretrain_weights (str): 若指定为路径时,则加载路径下预训练模型;若为None,则不使用预训练模型。 - optimizer (paddle.fluid.optimizer): 优化器。当改参数为None时,使用默认的优化器:使用 - fluid.optimizer.Momentum优化方法,polynomial的学习率衰减策略。 - learning_rate (float): 默认优化器的初始学习率。默认0.01。 - lr_decay_power (float): 默认优化器学习率多项式衰减系数。默认0.9。 - use_vdl (bool): 是否使用VisualDL进行可视化。默认False。 - - Raises: - ValueError: 模型从inference model进行加载。 - """ - super().train( - num_epochs=num_epochs, - train_reader=train_reader, - train_batch_size=train_batch_size, - eval_reader=eval_reader, - eval_best_metric=eval_best_metric, - save_interval_epochs=save_interval_epochs, - log_interval_steps=log_interval_steps, - save_dir=save_dir, - pretrain_weights=pretrain_weights, - resume_weights=resume_weights, - optimizer=optimizer, - learning_rate=learning_rate, - lr_decay_power=lr_decay_power, - regularization_coeff=regularization_coeff, - use_vdl=use_vdl) - - def evaluate(self, - eval_reader, - batch_size=1, - verbose=True, - epoch_id=None, - return_details=False): - """评估。 - - Args: - eval_reader (readers): 评估数据读取器。 - batch_size (int): 评估时的batch大小。默认1。 - verbose (bool): 是否打印日志。默认True。 - epoch_id (int): 当前评估模型所在的训练轮数。 - return_details (bool): 是否返回详细信息。默认False。 - - Returns: - dict: 当return_details为False时,返回dict。包含关键字:'miou'、'category_iou'、'macc'、 - 'category_acc'和'kappa',分别表示平均iou、各类别iou、平均准确率、各类别准确率和kappa系数。 - tuple (metrics, eval_details):当return_details为True时,增加返回dict (eval_details), - 包含关键字:'confusion_matrix',表示评估的混淆矩阵。 - """ - self.arrange_transform(transforms=eval_reader.transforms, mode='eval') - total_steps = math.ceil(eval_reader.num_samples * 1.0 / batch_size) - conf_mat = ConfusionMatrix(self.num_classes, streaming=True) - data_generator = eval_reader.generator( - batch_size=batch_size, drop_last=False) - if not hasattr(self, 'parallel_test_prog'): - self.parallel_test_prog = fluid.CompiledProgram( - self.test_prog).with_data_parallel( - share_vars_from=self.parallel_train_prog) - batch_size_each_gpu = self._get_single_card_bs(batch_size) - - for step, data in enumerate(data_generator()): - images = np.array([d[0] for d in data]) - images = images.astype(np.float32) - - labels = np.array([d[1] for d in data]) - num_samples = images.shape[0] - if num_samples < batch_size: - num_pad_samples = batch_size - num_samples - pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1)) - images = np.concatenate([images, pad_images]) - feed_data = {'image': images} - outputs = self.exe.run( - self.parallel_test_prog, - feed=feed_data, - fetch_list=list(self.test_outputs.values()), - return_numpy=True) - pred = outputs[0] - if num_samples < batch_size: - pred = pred[0:num_samples] - - mask = labels != self.ignore_index - conf_mat.calculate(pred=pred, label=labels, ignore=mask) - _, iou = conf_mat.mean_iou() - - if verbose: - logging.info("[EVAL] Epoch={}, Step={}/{}, iou={}".format( - epoch_id, step + 1, total_steps, iou)) - - category_iou, miou = conf_mat.mean_iou() - category_acc, macc = conf_mat.accuracy() - precision, recall = conf_mat.precision_recall() - - metrics = OrderedDict( - zip([ - 'miou', 'category_iou', 'macc', 'category_acc', 'kappa', - 'precision', 'recall' - ], [ - miou, category_iou, macc, category_acc, - conf_mat.kappa(), precision, recall - ])) - if return_details: - eval_details = { - 'confusion_matrix': conf_mat.confusion_matrix.tolist() - } - return metrics, eval_details - return metrics - - def predict(self, im_file, transforms=None): - """预测。 - Args: - img_file(str): 预测图像路径。 - transforms(transforms): 数据预处理操作。 - - Returns: - np.ndarray: 预测结果灰度图。 - """ - if transforms is None and not hasattr(self, 'test_transforms'): - raise Exception("transforms need to be defined, now is None.") - if transforms is not None: - self.arrange_transform(transforms=transforms, mode='test') - im, im_info = transforms(im_file) - else: - self.arrange_transform(transforms=self.test_transforms, mode='test') - im, im_info = self.test_transforms(im_file) - im = im.astype(np.float32) - im = np.expand_dims(im, axis=0) - result = self.exe.run( - self.test_prog, - feed={'image': im}, - fetch_list=list(self.test_outputs.values())) - pred = result[0] - pred = np.squeeze(pred).astype(np.uint8) - keys = list(im_info.keys()) - for k in keys[::-1]: - if k == 'shape_before_resize': - h, w = im_info[k][0], im_info[k][1] - pred = cv2.resize(pred, (w, h), cv2.INTER_NEAREST) - elif k == 'shape_before_padding': - h, w = im_info[k][0], im_info[k][1] - pred = pred[0:h, 0:w] - - return {'label_map': pred} diff --git a/legacy/contrib/RemoteSensing/models/utils/visualize.py b/legacy/contrib/RemoteSensing/models/utils/visualize.py deleted file mode 100644 index a47a756f13..0000000000 --- a/legacy/contrib/RemoteSensing/models/utils/visualize.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import os.path as osp -import numpy as np -from PIL import Image as Image - - -def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. - Args: - num_classes: Number of classes - Returns: - The color map - """ - color_map = num_classes * [0, 0, 0] - for i in range(0, num_classes): - j = 0 - lab = i - while lab: - color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) - color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) - color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) - j += 1 - lab >>= 3 - - return color_map - - -def splice_imgs(img_list, vis_path): - """Splice pictures horizontally - """ - IMAGE_WIDTH, IMAGE_HEIGHT = img_list[0].size - padding_width = 20 - img_num = len(img_list) - to_image = Image.new('RGB', - (img_num * IMAGE_WIDTH + (img_num - 1) * padding_width, - IMAGE_HEIGHT)) # Create a new picture - padding = Image.new('RGB', (padding_width, IMAGE_HEIGHT), (255, 255, 255)) - - # Loop through, paste each picture to the corresponding position in order - for i, from_image in enumerate(img_list): - to_image.paste(from_image, (i * (IMAGE_WIDTH + padding_width), 0)) - if i < img_num - 1: - to_image.paste(padding, - (i * (IMAGE_WIDTH + padding_width) + IMAGE_WIDTH, 0)) - return to_image.save(vis_path) diff --git a/legacy/contrib/RemoteSensing/nets/__init__.py b/legacy/contrib/RemoteSensing/nets/__init__.py deleted file mode 100644 index 381f327fe5..0000000000 --- a/legacy/contrib/RemoteSensing/nets/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .unet import UNet -from .hrnet import HRNet diff --git a/legacy/contrib/RemoteSensing/nets/hrnet.py b/legacy/contrib/RemoteSensing/nets/hrnet.py deleted file mode 100644 index 32d613e634..0000000000 --- a/legacy/contrib/RemoteSensing/nets/hrnet.py +++ /dev/null @@ -1,455 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from .loss import softmax_with_loss -from .loss import dice_loss -from .loss import bce_loss -from .libs import sigmoid_to_softmax - - -class HRNet(object): - def __init__(self, - num_classes, - input_channel=3, - mode='train', - stage1_num_modules=1, - stage1_num_blocks=[4], - stage1_num_channels=[64], - stage2_num_modules=1, - stage2_num_blocks=[4, 4], - stage2_num_channels=[18, 36], - stage3_num_modules=4, - stage3_num_blocks=[4, 4, 4], - stage3_num_channels=[18, 36, 72], - stage4_num_modules=3, - stage4_num_blocks=[4, 4, 4, 4], - stage4_num_channels=[18, 36, 72, 144], - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255): - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise ValueError( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - - self.num_classes = num_classes - self.input_channel = input_channel - self.mode = mode - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - self.stage1_num_modules = stage1_num_modules - self.stage1_num_blocks = stage1_num_blocks - self.stage1_num_channels = stage1_num_channels - self.stage2_num_modules = stage2_num_modules - self.stage2_num_blocks = stage2_num_blocks - self.stage2_num_channels = stage2_num_channels - self.stage3_num_modules = stage3_num_modules - self.stage3_num_blocks = stage3_num_blocks - self.stage3_num_channels = stage3_num_channels - self.stage4_num_modules = stage4_num_modules - self.stage4_num_blocks = stage4_num_blocks - self.stage4_num_channels = stage4_num_channels - - def build_net(self, inputs): - if self.use_dice_loss or self.use_bce_loss: - self.num_classes = 1 - image = inputs['image'] - logit = self._high_resolution_net(image, self.num_classes) - if self.num_classes == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if self.mode == 'train': - label = inputs['label'] - mask = label != self.ignore_index - return self._get_loss(logit, label, mask) - - else: - if self.num_classes == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = fluid.layers.softmax(logit, axis=1) - return pred, logit - - return logit - - def generate_inputs(self): - inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', - shape=[None, self.input_channel, None, None], - name='image') - if self.mode == 'train': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - return inputs - - def _get_loss(self, logit, label, mask): - avg_loss = 0 - if not (self.use_dice_loss or self.use_bce_loss): - avg_loss += softmax_with_loss( - logit, - label, - mask, - num_classes=self.num_classes, - weight=self.class_weight, - ignore_index=self.ignore_index) - else: - if self.use_dice_loss: - avg_loss += dice_loss(logit, label, mask) - if self.use_bce_loss: - avg_loss += bce_loss( - logit, label, mask, ignore_index=self.ignore_index) - - return avg_loss - - def _conv_bn_layer(self, - input, - filter_size, - num_filters, - stride=1, - padding=1, - num_groups=1, - if_act=True, - name=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=num_groups, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr( - name=bn_name + "_scale", - initializer=fluid.initializer.Constant(1.0)), - bias_attr=ParamAttr( - name=bn_name + "_offset", - initializer=fluid.initializer.Constant(0.0)), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - bn = fluid.layers.relu(bn) - return bn - - def _basic_block(self, - input, - num_filters, - stride=1, - downsample=False, - name=None): - residual = input - conv = self._conv_bn_layer( - input=input, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv1') - conv = self._conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - if_act=False, - name=name + '_conv2') - if downsample: - residual = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - def _bottleneck_block(self, - input, - num_filters, - stride=1, - downsample=False, - name=None): - residual = input - conv = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters, - name=name + '_conv1') - conv = self._conv_bn_layer( - input=conv, - filter_size=3, - num_filters=num_filters, - stride=stride, - name=name + '_conv2') - conv = self._conv_bn_layer( - input=conv, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_conv3') - if downsample: - residual = self._conv_bn_layer( - input=input, - filter_size=1, - num_filters=num_filters * 4, - if_act=False, - name=name + '_downsample') - return fluid.layers.elementwise_add(x=residual, y=conv, act='relu') - - def _fuse_layers(self, x, channels, multi_scale_output=True, name=None): - out = [] - for i in range(len(channels) if multi_scale_output else 1): - residual = x[i] - shape = fluid.layers.shape(residual)[-2:] - for j in range(len(channels)): - if j > i: - y = self._conv_bn_layer( - x[j], - filter_size=1, - num_filters=channels[i], - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + str(j + 1)) - y = fluid.layers.resize_bilinear(input=y, out_shape=shape) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - elif j < i: - y = x[j] - for k in range(i - j): - if k == i - j - 1: - y = self._conv_bn_layer( - y, - filter_size=3, - num_filters=channels[i], - stride=2, - if_act=False, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - else: - y = self._conv_bn_layer( - y, - filter_size=3, - num_filters=channels[j], - stride=2, - name=name + '_layer_' + str(i + 1) + '_' + - str(j + 1) + '_' + str(k + 1)) - residual = fluid.layers.elementwise_add( - x=residual, y=y, act=None) - - residual = fluid.layers.relu(residual) - out.append(residual) - return out - - def _branches(self, x, block_num, channels, name=None): - out = [] - for i in range(len(channels)): - residual = x[i] - for j in range(block_num[i]): - residual = self._basic_block( - residual, - channels[i], - name=name + '_branch_layer_' + str(i + 1) + '_' + - str(j + 1)) - out.append(residual) - return out - - def _high_resolution_module(self, - x, - blocks, - channels, - multi_scale_output=True, - name=None): - residual = self._branches(x, blocks, channels, name=name) - out = self._fuse_layers( - residual, - channels, - multi_scale_output=multi_scale_output, - name=name) - return out - - def _transition_layer(self, x, in_channels, out_channels, name=None): - num_in = len(in_channels) - num_out = len(out_channels) - out = [] - for i in range(num_out): - if i < num_in: - if in_channels[i] != out_channels[i]: - residual = self._conv_bn_layer( - x[i], - filter_size=3, - num_filters=out_channels[i], - name=name + '_layer_' + str(i + 1)) - out.append(residual) - else: - out.append(x[i]) - else: - residual = self._conv_bn_layer( - x[-1], - filter_size=3, - num_filters=out_channels[i], - stride=2, - name=name + '_layer_' + str(i + 1)) - out.append(residual) - return out - - def _stage(self, - x, - num_modules, - num_blocks, - num_channels, - multi_scale_output=True, - name=None): - out = x - for i in range(num_modules): - if i == num_modules - 1 and multi_scale_output == False: - out = self._high_resolution_module( - out, - num_blocks, - num_channels, - multi_scale_output=False, - name=name + '_' + str(i + 1)) - else: - out = self._high_resolution_module( - out, num_blocks, num_channels, name=name + '_' + str(i + 1)) - - return out - - def _layer1(self, input, num_modules, num_blocks, num_channels, name=None): - # num_modules 默认为1,是否增加处理,官网实现为[1],是否对齐。 - conv = input - for i in range(num_blocks[0]): - conv = self._bottleneck_block( - conv, - num_filters=num_channels[0], - downsample=True if i == 0 else False, - name=name + '_' + str(i + 1)) - return conv - - def _high_resolution_net(self, input, num_classes): - x = self._conv_bn_layer( - input=input, - filter_size=3, - num_filters=self.stage1_num_channels[0], - stride=2, - if_act=True, - name='layer1_1') - x = self._conv_bn_layer( - input=x, - filter_size=3, - num_filters=self.stage1_num_channels[0], - stride=2, - if_act=True, - name='layer1_2') - - la1 = self._layer1( - x, - self.stage1_num_modules, - self.stage1_num_blocks, - self.stage1_num_channels, - name='layer2') - tr1 = self._transition_layer([la1], - self.stage1_num_channels, - self.stage2_num_channels, - name='tr1') - st2 = self._stage( - tr1, - self.stage2_num_modules, - self.stage2_num_blocks, - self.stage2_num_channels, - name='st2') - tr2 = self._transition_layer( - st2, self.stage2_num_channels, self.stage3_num_channels, name='tr2') - st3 = self._stage( - tr2, - self.stage3_num_modules, - self.stage3_num_blocks, - self.stage3_num_channels, - name='st3') - tr3 = self._transition_layer( - st3, self.stage3_num_channels, self.stage4_num_channels, name='tr3') - st4 = self._stage( - tr3, - self.stage4_num_modules, - self.stage4_num_blocks, - self.stage4_num_channels, - name='st4') - - # upsample - shape = fluid.layers.shape(st4[0])[-2:] - st4[1] = fluid.layers.resize_bilinear(st4[1], out_shape=shape) - st4[2] = fluid.layers.resize_bilinear(st4[2], out_shape=shape) - st4[3] = fluid.layers.resize_bilinear(st4[3], out_shape=shape) - - out = fluid.layers.concat(st4, axis=1) - last_channels = sum(self.stage4_num_channels) - - out = self._conv_bn_layer( - input=out, - filter_size=1, - num_filters=last_channels, - stride=1, - if_act=True, - name='conv-2') - out = fluid.layers.conv2d( - input=out, - num_filters=num_classes, - filter_size=1, - stride=1, - padding=0, - act=None, - param_attr=ParamAttr(initializer=MSRA(), name='conv-1_weights'), - bias_attr=False) - - input_shape = fluid.layers.shape(input)[-2:] - out = fluid.layers.resize_bilinear(out, input_shape) - - return out diff --git a/legacy/contrib/RemoteSensing/nets/libs.py b/legacy/contrib/RemoteSensing/nets/libs.py deleted file mode 100644 index e475e19719..0000000000 --- a/legacy/contrib/RemoteSensing/nets/libs.py +++ /dev/null @@ -1,219 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -import contextlib - -bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) -name_scope = "" - - -@contextlib.contextmanager -def scope(name): - global name_scope - bk = name_scope - name_scope = name_scope + name + '/' - yield - name_scope = bk - - -def max_pool(input, kernel, stride, padding): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='max', - pool_stride=stride, - pool_padding=padding) - return data - - -def avg_pool(input, kernel, stride, padding=0): - data = fluid.layers.pool2d( - input, - pool_size=kernel, - pool_type='avg', - pool_stride=stride, - pool_padding=padding) - return data - - -def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): - N, C, H, W = input.shape - if C % G != 0: - for d in range(10): - for t in [d, -d]: - if G + t <= 0: continue - if C % (G + t) == 0: - G = G + t - break - if C % G == 0: - break - assert C % G == 0, "group can not divide channle" - x = fluid.layers.group_norm( - input, - groups=G, - param_attr=param_attr, - bias_attr=bias_attr, - name=name_scope + 'group_norm') - return x - - -def bn(*args, - norm_type='bn', - eps=1e-5, - bn_momentum=0.99, - group_norm=32, - **kargs): - - if norm_type == 'bn': - with scope('BatchNorm'): - return fluid.layers.batch_norm( - *args, - epsilon=eps, - momentum=bn_momentum, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer), - moving_mean_name=name_scope + 'moving_mean', - moving_variance_name=name_scope + 'moving_variance', - **kargs) - elif norm_type == 'gn': - with scope('GroupNorm'): - return group_norm( - args[0], - group_norm, - eps=eps, - param_attr=fluid.ParamAttr( - name=name_scope + 'gamma', regularizer=bn_regularizer), - bias_attr=fluid.ParamAttr( - name=name_scope + 'beta', regularizer=bn_regularizer)) - else: - raise Exception("Unsupport norm type:" + norm_type) - - -def bn_relu(data, norm_type='bn', eps=1e-5): - return fluid.layers.relu(bn(data, norm_type=norm_type, eps=eps)) - - -def relu(data): - return fluid.layers.relu(data) - - -def conv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = fluid.ParamAttr( - name=name_scope + 'biases', - regularizer=None, - initializer=fluid.initializer.ConstantInitializer(value=0.0)) - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d(*args, **kargs) - - -def deconv(*args, **kargs): - kargs['param_attr'] = name_scope + 'weights' - if 'bias_attr' in kargs and kargs['bias_attr']: - kargs['bias_attr'] = name_scope + 'biases' - else: - kargs['bias_attr'] = False - return fluid.layers.conv2d_transpose(*args, **kargs) - - -def separate_conv(input, - channel, - stride, - filter, - dilation=1, - act=None, - eps=1e-5): - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope('depthwise'): - input = conv( - input, - input.shape[1], - filter, - stride, - groups=input.shape[1], - padding=(filter // 2) * dilation, - dilation=dilation, - use_cudnn=False, - param_attr=param_attr) - input = bn(input, eps=eps) - if act: input = act(input) - - param_attr = fluid.ParamAttr( - name=name_scope + 'weights', - regularizer=None, - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) - with scope('pointwise'): - input = conv( - input, channel, 1, 1, groups=1, padding=0, param_attr=param_attr) - input = bn(input, eps=eps) - if act: input = act(input) - return input - - -def conv_bn_layer(input, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - if_act=True, - name=None, - use_cudnn=True): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=fluid.ParamAttr(name=name + '_weights'), - bias_attr=False) - bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=fluid.ParamAttr(name=bn_name + "_scale"), - bias_attr=fluid.ParamAttr(name=bn_name + "_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if if_act: - return fluid.layers.relu6(bn) - else: - return bn - - -def sigmoid_to_softmax(input): - """ - one channel to two channel - """ - logit = fluid.layers.sigmoid(input) - logit_back = 1 - logit - logit = fluid.layers.concat([logit_back, logit], axis=1) - return logit diff --git a/legacy/contrib/RemoteSensing/nets/loss.py b/legacy/contrib/RemoteSensing/nets/loss.py deleted file mode 100644 index 3d80416fb8..0000000000 --- a/legacy/contrib/RemoteSensing/nets/loss.py +++ /dev/null @@ -1,117 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as fluid -import numpy as np - - -def softmax_with_loss(logit, - label, - ignore_mask=None, - num_classes=2, - weight=None, - ignore_index=255): - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - label = fluid.layers.elementwise_min( - label, fluid.layers.assign(np.array([num_classes - 1], dtype=np.int32))) - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - logit = fluid.layers.reshape(logit, [-1, num_classes]) - label = fluid.layers.reshape(label, [-1, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.reshape(ignore_mask, [-1, 1]) - if weight is None: - loss, probs = fluid.layers.softmax_with_cross_entropy( - logit, label, ignore_index=ignore_index, return_softmax=True) - else: - label = fluid.layers.squeeze(label, axes=[-1]) - label_one_hot = fluid.one_hot(input=label, depth=num_classes) - if isinstance(weight, list): - assert len( - weight - ) == num_classes, "weight length must equal num of classes" - weight = fluid.layers.assign(np.array([weight], dtype='float32')) - elif isinstance(weight, str): - assert weight.lower( - ) == 'dynamic', 'if weight is string, must be dynamic!' - tmp = [] - total_num = fluid.layers.cast( - fluid.layers.shape(label)[0], 'float32') - for i in range(num_classes): - cls_pixel_num = fluid.layers.reduce_sum(label_one_hot[:, i]) - ratio = total_num / (cls_pixel_num + 1) - tmp.append(ratio) - weight = fluid.layers.concat(tmp) - weight = weight / fluid.layers.reduce_sum(weight) * num_classes - elif isinstance(weight, fluid.layers.Variable): - pass - else: - raise ValueError( - 'Expect weight is a list, string or Variable, but receive {}'. - format(type(weight))) - weight = fluid.layers.reshape(weight, [1, num_classes]) - weighted_label_one_hot = fluid.layers.elementwise_mul( - label_one_hot, weight) - probs = fluid.layers.softmax(logit) - loss = fluid.layers.cross_entropy( - probs, - weighted_label_one_hot, - soft_label=True, - ignore_index=ignore_index) - weighted_label_one_hot.stop_gradient = True - - loss = loss * ignore_mask - avg_loss = fluid.layers.mean(loss) / ( - fluid.layers.mean(ignore_mask) + 0.00001) - - label.stop_gradient = True - ignore_mask.stop_gradient = True - return avg_loss - - -# to change, how to appicate ignore index and ignore mask -def dice_loss(logit, label, ignore_mask=None, epsilon=0.00001): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception( - "dice loss is only applicable to one channel classfication") - ignore_mask = fluid.layers.cast(ignore_mask, 'float32') - logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) - label = fluid.layers.transpose(label, [0, 2, 3, 1]) - label = fluid.layers.cast(label, 'int64') - ignore_mask = fluid.layers.transpose(ignore_mask, [0, 2, 3, 1]) - logit = fluid.layers.sigmoid(logit) - logit = logit * ignore_mask - label = label * ignore_mask - reduce_dim = list(range(1, len(logit.shape))) - inse = fluid.layers.reduce_sum(logit * label, dim=reduce_dim) - dice_denominator = fluid.layers.reduce_sum( - logit, dim=reduce_dim) + fluid.layers.reduce_sum( - label, dim=reduce_dim) - dice_score = 1 - inse * 2 / (dice_denominator + epsilon) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return fluid.layers.reduce_mean(dice_score) - - -def bce_loss(logit, label, ignore_mask=None, ignore_index=255): - if logit.shape[1] != 1 or label.shape[1] != 1 or ignore_mask.shape[1] != 1: - raise Exception("bce loss is only applicable to binary classfication") - label = fluid.layers.cast(label, 'float32') - loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=logit, label=label, ignore_index=ignore_index, - normalize=True) # or False - loss = fluid.layers.reduce_sum(loss) - label.stop_gradient = True - ignore_mask.stop_gradient = True - return loss diff --git a/legacy/contrib/RemoteSensing/nets/unet.py b/legacy/contrib/RemoteSensing/nets/unet.py deleted file mode 100644 index 0574f0f582..0000000000 --- a/legacy/contrib/RemoteSensing/nets/unet.py +++ /dev/null @@ -1,268 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import OrderedDict - -import paddle.fluid as fluid -from .libs import scope, name_scope -from .libs import bn, bn_relu, relu -from .libs import conv, max_pool, deconv -from .libs import sigmoid_to_softmax -from .loss import softmax_with_loss -from .loss import dice_loss -from .loss import bce_loss - - -class UNet(object): - """实现Unet模型 - `"U-Net: Convolutional Networks for Biomedical Image Segmentation" - ` - - Args: - num_classes (int): 类别数 - mode (str): 网络运行模式,根据mode构建网络的输入和返回。 - 当mode为'train'时,输入为image(-1, 3, -1, -1)和label (-1, 1, -1, -1) 返回loss。 - 当mode为'train'时,输入为image (-1, 3, -1, -1)和label (-1, 1, -1, -1),返回loss, - pred (与网络输入label 相同大小的预测结果,值代表相应的类别),label,mask(非忽略值的mask, - 与label相同大小,bool类型)。 - 当mode为'test'时,输入为image(-1, 3, -1, -1)返回pred (-1, 1, -1, -1)和 - logit (-1, num_classes, -1, -1) 通道维上代表每一类的概率值。 - upsample_mode (str): UNet decode时采用的上采样方式,取值为'bilinear'时利用双线行差值进行上菜样, - 当输入其他选项时则利用反卷积进行上菜样,默认为'bilinear'。 - use_bce_loss (bool): 是否使用bce loss作为网络的损失函数,只能用于两类分割。可与dice loss同时使用。 - use_dice_loss (bool): 是否使用dice loss作为网络的损失函数,只能用于两类分割,可与bce loss同时使用。 - 当use_bce_loss和use_dice_loss都为False时,使用交叉熵损失函数。 - class_weight (list/str): 交叉熵损失函数各类损失的权重。当class_weight为list的时候,长度应为 - num_classes。当class_weight为str时, weight.lower()应为'dynamic',这时会根据每一轮各类像素的比重 - 自行计算相应的权重,每一类的权重为:每类的比例 * num_classes。class_weight取默认值None是,各类的权重1, - 即平时使用的交叉熵损失函数。 - ignore_index (int): label上忽略的值,label为ignore_index的像素不参与损失函数的计算。 - - Raises: - ValueError: use_bce_loss或use_dice_loss为真且num_calsses > 2。 - ValueError: class_weight为list, 但长度不等于num_class。 - class_weight为str, 但class_weight.low()不等于dynamic。 - TypeError: class_weight不为None时,其类型不是list或str。 - """ - - def __init__(self, - num_classes, - mode='train', - upsample_mode='bilinear', - input_channel=3, - use_bce_loss=False, - use_dice_loss=False, - class_weight=None, - ignore_index=255): - # dice_loss或bce_loss只适用两类分割中 - if num_classes > 2 and (use_bce_loss or use_dice_loss): - raise Exception( - "dice loss and bce loss is only applicable to binary classfication" - ) - - if class_weight is not None: - if isinstance(class_weight, list): - if len(class_weight) != num_classes: - raise ValueError( - "Length of class_weight should be equal to number of classes" - ) - elif isinstance(class_weight, str): - if class_weight.lower() != 'dynamic': - raise ValueError( - "if class_weight is string, must be dynamic!") - else: - raise TypeError( - 'Expect class_weight is a list or string but receive {}'. - format(type(class_weight))) - self.num_classes = num_classes - self.mode = mode - self.upsample_mode = upsample_mode - self.input_channel = input_channel - self.use_bce_loss = use_bce_loss - self.use_dice_loss = use_dice_loss - self.class_weight = class_weight - self.ignore_index = ignore_index - - def _double_conv(self, data, out_ch): - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) - with scope("conv0"): - data = bn_relu( - conv( - data, out_ch, 3, stride=1, padding=1, - param_attr=param_attr)) - with scope("conv1"): - data = bn_relu( - conv( - data, out_ch, 3, stride=1, padding=1, - param_attr=param_attr)) - return data - - def _down(self, data, out_ch): - # 下采样:max_pool + 2个卷积 - with scope("down"): - data = max_pool(data, 2, 2, 0) - data = self._double_conv(data, out_ch) - return data - - def _up(self, data, short_cut, out_ch): - # 上采样:data上采样(resize或deconv), 并与short_cut concat - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.XavierInitializer(), - ) - with scope("up"): - if self.upsample_mode == 'bilinear': - short_cut_shape = fluid.layers.shape(short_cut) - data = fluid.layers.resize_bilinear(data, short_cut_shape[2:]) - else: - data = deconv( - data, - out_ch // 2, - filter_size=2, - stride=2, - padding=0, - param_attr=param_attr) - data = fluid.layers.concat([data, short_cut], axis=1) - data = self._double_conv(data, out_ch) - return data - - def _encode(self, data): - # 编码器设置 - short_cuts = [] - with scope("encode"): - with scope("block1"): - data = self._double_conv(data, 64) - short_cuts.append(data) - with scope("block2"): - data = self._down(data, 128) - short_cuts.append(data) - with scope("block3"): - data = self._down(data, 256) - short_cuts.append(data) - with scope("block4"): - data = self._down(data, 512) - short_cuts.append(data) - with scope("block5"): - data = self._down(data, 512) - return data, short_cuts - - def _decode(self, data, short_cuts): - # 解码器设置,与编码器对称 - with scope("decode"): - with scope("decode1"): - data = self._up(data, short_cuts[3], 256) - with scope("decode2"): - data = self._up(data, short_cuts[2], 128) - with scope("decode3"): - data = self._up(data, short_cuts[1], 64) - with scope("decode4"): - data = self._up(data, short_cuts[0], 64) - return data - - def _get_logit(self, data, num_classes): - # 根据类别数设置最后一个卷积层输出 - param_attr = fluid.ParamAttr( - name='weights', - regularizer=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=0.0), - initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01)) - with scope("logit"): - data = conv( - data, - num_classes, - 3, - stride=1, - padding=1, - param_attr=param_attr) - return data - - def _get_loss(self, logit, label, mask): - avg_loss = 0 - if not (self.use_dice_loss or self.use_bce_loss): - avg_loss += softmax_with_loss( - logit, - label, - mask, - num_classes=self.num_classes, - weight=self.class_weight, - ignore_index=self.ignore_index) - else: - if self.use_dice_loss: - avg_loss += dice_loss(logit, label, mask) - if self.use_bce_loss: - avg_loss += bce_loss( - logit, label, mask, ignore_index=self.ignore_index) - - return avg_loss - - def generate_inputs(self): - inputs = OrderedDict() - inputs['image'] = fluid.data( - dtype='float32', - shape=[None, self.input_channel, None, None], - name='image') - if self.mode == 'train': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - elif self.mode == 'eval': - inputs['label'] = fluid.data( - dtype='int32', shape=[None, 1, None, None], name='label') - return inputs - - def build_net(self, inputs): - # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1 - if self.use_dice_loss or self.use_bce_loss: - self.num_classes = 1 - - image = inputs['image'] - encode_data, short_cuts = self._encode(image) - decode_data = self._decode(encode_data, short_cuts) - logit = self._get_logit(decode_data, self.num_classes) - - if self.num_classes == 1: - out = sigmoid_to_softmax(logit) - out = fluid.layers.transpose(out, [0, 2, 3, 1]) - else: - out = fluid.layers.transpose(logit, [0, 2, 3, 1]) - - pred = fluid.layers.argmax(out, axis=3) - pred = fluid.layers.unsqueeze(pred, axes=[3]) - - if self.mode == 'train': - label = inputs['label'] - mask = label != self.ignore_index - return self._get_loss(logit, label, mask) - - elif self.mode == 'eval': - label = inputs['label'] - mask = label != self.ignore_index - loss = self._get_loss(logit, label, mask) - return loss, pred, label, mask - else: - if self.num_classes == 1: - logit = sigmoid_to_softmax(logit) - else: - logit = fluid.layers.softmax(logit, axis=1) - return pred, logit diff --git a/legacy/contrib/RemoteSensing/predict_demo.py b/legacy/contrib/RemoteSensing/predict_demo.py deleted file mode 100644 index bd80f57ed5..0000000000 --- a/legacy/contrib/RemoteSensing/predict_demo.py +++ /dev/null @@ -1,118 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import sys -import numpy as np -from PIL import Image as Image -import argparse -from models import load_model -from models.utils.visualize import get_color_map_list -from utils import paddle_utils - - -def parse_args(): - parser = argparse.ArgumentParser(description='RemoteSensing predict') - parser.add_argument( - '--single_img', - dest='single_img', - help='single image path to predict', - default=None, - type=str) - parser.add_argument( - '--data_dir', - dest='data_dir', - help='dataset directory', - default=None, - type=str) - parser.add_argument( - '--file_list', - dest='file_list', - help='file name of predict file list', - default=None, - type=str) - parser.add_argument( - '--load_model_dir', - dest='load_model_dir', - help='model load directory', - default=None, - type=str) - parser.add_argument( - '--save_img_dir', - dest='save_img_dir', - help='save directory name of predict results', - default='predict_results', - type=str) - parser.add_argument( - '--color_map', - dest='color_map', - help='color map of predict results', - type=int, - nargs='*', - default=-1) - if len(sys.argv) < 2: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -paddle_utils.enable_static() -args = parse_args() -data_dir = args.data_dir -file_list = args.file_list -single_img = args.single_img -load_model_dir = args.load_model_dir -save_img_dir = args.save_img_dir -if not osp.exists(save_img_dir): - os.makedirs(save_img_dir) -if args.color_map == -1: - color_map = get_color_map_list(256) -else: - color_map = args.color_map - -# predict -model = load_model(load_model_dir) - -if single_img is not None: - pred = model.predict(single_img) - # 以伪彩色png图片保存预测结果 - pred_name, _ = osp.splitext(osp.basename(single_img)) - pred_path = osp.join(save_img_dir, pred_name + '.png') - pred_mask = Image.fromarray(pred['label_map'].astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(pred_path) - print('Predict result is saved in {}'.format(pred_path)) -elif (file_list is not None) and (data_dir is not None): - with open(osp.join(data_dir, file_list)) as f: - lines = f.readlines() - for line in lines: - img_path = line.split(' ')[0] - img_path_ = osp.join(data_dir, img_path) - - pred = model.predict(img_path_) - - # 以伪彩色png图片保存预测结果 - pred_name, _ = osp.splitext(osp.basename(img_path)) - pred_path = osp.join(save_img_dir, pred_name + '.png') - pred_mask = Image.fromarray( - pred['label_map'].astype(np.uint8), mode='P') - pred_mask.putpalette(color_map) - pred_mask.save(pred_path) - print('Predict result is saved in {}'.format(pred_path)) -else: - raise Exception( - 'You should either set the parameter single_img, or set the parameters data_dir and file_list.' - ) diff --git a/legacy/contrib/RemoteSensing/readers/__init__.py b/legacy/contrib/RemoteSensing/readers/__init__.py deleted file mode 100644 index 642d80b146..0000000000 --- a/legacy/contrib/RemoteSensing/readers/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .reader import Reader diff --git a/legacy/contrib/RemoteSensing/readers/base.py b/legacy/contrib/RemoteSensing/readers/base.py deleted file mode 100644 index 8e73adbc78..0000000000 --- a/legacy/contrib/RemoteSensing/readers/base.py +++ /dev/null @@ -1,250 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from threading import Thread -import multiprocessing -import collections -import numpy as np -import six -import sys -import copy -import random -import platform -import chardet -from utils import logging - - -class EndSignal(): - pass - - -def is_pic(img_name): - valid_suffix = ['JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png'] - suffix = img_name.split('.')[-1] - if suffix not in valid_suffix: - return False - return True - - -def is_valid(sample): - if sample is None: - return False - if isinstance(sample, tuple): - for s in sample: - if s is None: - return False - elif isinstance(s, np.ndarray) and s.size == 0: - return False - elif isinstance(s, collections.Sequence) and len(s) == 0: - return False - return True - - -def get_encoding(path): - f = open(path, 'rb') - data = f.read() - file_encoding = chardet.detect(data).get('encoding') - return file_encoding - - -def multithread_reader(mapper, - reader, - num_workers=4, - buffer_size=1024, - batch_size=8, - drop_last=True): - from queue import Queue - end = EndSignal() - - # define a worker to read samples from reader to in_queue - def read_worker(reader, in_queue): - for i in reader(): - in_queue.put(i) - in_queue.put(end) - - # define a worker to handle samples from in_queue by mapper - # and put mapped samples into out_queue - def handle_worker(in_queue, out_queue, mapper): - sample = in_queue.get() - while not isinstance(sample, EndSignal): - if len(sample) == 2: - r = mapper(sample[0], sample[1]) - elif len(sample) == 3: - r = mapper(sample[0], sample[1], sample[2]) - else: - raise Exception('The sample\'s length must be 2 or 3.') - if is_valid(r): - out_queue.put(r) - sample = in_queue.get() - in_queue.put(end) - out_queue.put(end) - - def xreader(): - in_queue = Queue(buffer_size) - out_queue = Queue(buffer_size) - # start a read worker in a thread - target = read_worker - t = Thread(target=target, args=(reader, in_queue)) - t.daemon = True - t.start() - # start several handle_workers - target = handle_worker - args = (in_queue, out_queue, mapper) - workers = [] - for i in range(num_workers): - worker = Thread(target=target, args=args) - worker.daemon = True - workers.append(worker) - for w in workers: - w.start() - - batch_data = [] - sample = out_queue.get() - while not isinstance(sample, EndSignal): - batch_data.append(sample) - if len(batch_data) == batch_size: - batch_data = GenerateMiniBatch(batch_data) - yield batch_data - batch_data = [] - sample = out_queue.get() - finish = 1 - while finish < num_workers: - sample = out_queue.get() - if isinstance(sample, EndSignal): - finish += 1 - else: - batch_data.append(sample) - if len(batch_data) == batch_size: - batch_data = GenerateMiniBatch(batch_data) - yield batch_data - batch_data = [] - if not drop_last and len(batch_data) != 0: - batch_data = GenerateMiniBatch(batch_data) - yield batch_data - batch_data = [] - - return xreader - - -def multiprocess_reader(mapper, - reader, - num_workers=4, - buffer_size=1024, - batch_size=8, - drop_last=True): - from .shared_queue import SharedQueue as Queue - - def _read_into_queue(samples, mapper, queue): - end = EndSignal() - try: - for sample in samples: - if sample is None: - raise ValueError("sample has None") - if len(sample) == 2: - result = mapper(sample[0], sample[1]) - elif len(sample) == 3: - result = mapper(sample[0], sample[1], sample[2]) - else: - raise Exception('The sample\'s length must be 2 or 3.') - if is_valid(result): - queue.put(result) - queue.put(end) - except: - queue.put("") - six.reraise(*sys.exc_info()) - - def queue_reader(): - queue = Queue(buffer_size, memsize=3 * 1024**3) - total_samples = [[] for i in range(num_workers)] - for i, sample in enumerate(reader()): - index = i % num_workers - total_samples[index].append(sample) - for i in range(num_workers): - p = multiprocessing.Process( - target=_read_into_queue, args=(total_samples[i], mapper, queue)) - p.start() - - finish_num = 0 - batch_data = list() - while finish_num < num_workers: - sample = queue.get() - if isinstance(sample, EndSignal): - finish_num += 1 - elif sample == "": - raise ValueError("multiprocess reader raises an exception") - else: - batch_data.append(sample) - if len(batch_data) == batch_size: - batch_data = GenerateMiniBatch(batch_data) - yield batch_data - batch_data = [] - if len(batch_data) != 0 and not drop_last: - batch_data = GenerateMiniBatch(batch_data) - yield batch_data - batch_data = [] - - return queue_reader - - -def GenerateMiniBatch(batch_data): - if len(batch_data) == 1: - return batch_data - width = [data[0].shape[2] for data in batch_data] - height = [data[0].shape[1] for data in batch_data] - if len(set(width)) == 1 and len(set(height)) == 1: - return batch_data - max_shape = np.array([data[0].shape for data in batch_data]).max(axis=0) - padding_batch = [] - for data in batch_data: - im_c, im_h, im_w = data[0].shape[:] - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), - dtype=np.float32) - padding_im[:, :im_h, :im_w] = data[0] - padding_batch.append((padding_im, ) + data[1:]) - return padding_batch - - -class BaseReader: - def __init__(self, - transforms=None, - num_workers=4, - buffer_size=100, - parallel_method='thread', - shuffle=False): - if transforms is None: - raise Exception("transform should be defined.") - self.transforms = transforms - self.num_workers = num_workers - self.buffer_size = buffer_size - self.parallel_method = parallel_method - self.shuffle = shuffle - - def generator(self, batch_size=1, drop_last=True): - self.batch_size = batch_size - parallel_reader = multithread_reader - if self.parallel_method == "process": - if platform.platform().startswith("Windows"): - logging.debug( - "multiprocess_reader is not supported in Windows platform, force to use multithread_reader." - ) - else: - parallel_reader = multiprocess_reader - return parallel_reader( - self.transforms, - self.iterator, - num_workers=self.num_workers, - buffer_size=self.buffer_size, - batch_size=batch_size, - drop_last=drop_last) diff --git a/legacy/contrib/RemoteSensing/readers/reader.py b/legacy/contrib/RemoteSensing/readers/reader.py deleted file mode 100644 index e06d4e92f9..0000000000 --- a/legacy/contrib/RemoteSensing/readers/reader.py +++ /dev/null @@ -1,111 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import absolute_import -import os.path as osp -import random -import imghdr -import gdal -import numpy as np -from utils import logging -from .base import BaseReader -from .base import get_encoding -from collections import OrderedDict -from PIL import Image - - -def read_img(img_path): - img_format = imghdr.what(img_path) - name, ext = osp.splitext(img_path) - if img_format == 'tiff' or ext == '.img': - dataset = gdal.Open(img_path) - if dataset == None: - raise Exception('Can not open', img_path) - im_data = dataset.ReadAsArray() - return im_data.transpose((1, 2, 0)) - elif img_format == 'png': - return np.asarray(Image.open(img_path)) - elif ext == '.npy': - return np.load(img_path) - else: - raise Exception('Not support {} image format!'.format(ext)) - - -class Reader(BaseReader): - """读取数据集,并对样本进行相应的处理。 - - Args: - data_dir (str): 数据集所在的目录路径。 - file_list (str): 描述数据集图片文件和对应标注文件的文件路径(文本内每行路径为相对data_dir的相对路径)。 - label_list (str): 描述数据集包含的类别信息文件路径。 - transforms (list): 数据集中每个样本的预处理/增强算子。 - num_workers (int): 数据集中样本在预处理过程中的线程或进程数。默认为4。 - buffer_size (int): 数据集中样本在预处理过程中队列的缓存长度,以样本数为单位。默认为100。 - parallel_method (str): 数据集中样本在预处理过程中并行处理的方式,支持'thread' - 线程和'process'进程两种方式。默认为'thread'。 - shuffle (bool): 是否需要对数据集中样本打乱顺序。默认为False。 - """ - - def __init__(self, - data_dir, - file_list, - label_list, - transforms=None, - num_workers=4, - buffer_size=100, - parallel_method='thread', - shuffle=False): - super(Reader, self).__init__( - transforms=transforms, - num_workers=num_workers, - buffer_size=buffer_size, - parallel_method=parallel_method, - shuffle=shuffle) - self.file_list = OrderedDict() - self.labels = list() - self._epoch = 0 - - with open(label_list, encoding=get_encoding(label_list)) as f: - for line in f: - item = line.strip() - self.labels.append(item) - - with open(file_list, encoding=get_encoding(file_list)) as f: - for line in f: - items = line.strip().split() - full_path_im = osp.join(data_dir, items[0]) - full_path_label = osp.join(data_dir, items[1]) - if not osp.exists(full_path_im): - raise IOError( - 'The image file {} is not exist!'.format(full_path_im)) - if not osp.exists(full_path_label): - raise IOError('The image file {} is not exist!'.format( - full_path_label)) - self.file_list[full_path_im] = full_path_label - self.num_samples = len(self.file_list) - logging.info("{} samples in file {}".format( - len(self.file_list), file_list)) - - def iterator(self): - self._epoch += 1 - self._pos = 0 - files = list(self.file_list.keys()) - if self.shuffle: - random.shuffle(files) - files = files[:self.num_samples] - self.num_samples = len(files) - for f in files: - label_path = self.file_list[f] - sample = [f, None, label_path] - yield sample diff --git a/legacy/contrib/RemoteSensing/requirements.txt b/legacy/contrib/RemoteSensing/requirements.txt deleted file mode 100644 index f98fb9bc51..0000000000 --- a/legacy/contrib/RemoteSensing/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -visualdl >= 2.0.0b diff --git a/legacy/contrib/RemoteSensing/tools/cal_norm_coef.py b/legacy/contrib/RemoteSensing/tools/cal_norm_coef.py deleted file mode 100644 index a5fc03e062..0000000000 --- a/legacy/contrib/RemoteSensing/tools/cal_norm_coef.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import os -import os.path as osp -import sys -import argparse -from tqdm import tqdm -import pickle -from data_analyse_and_check import read_img - - -def parse_args(): - parser = argparse.ArgumentParser( - description= - 'Compute normalization coefficient and clip percentage before training.' - ) - parser.add_argument( - '--data_dir', - dest='data_dir', - help='Dataset directory', - default=None, - type=str) - parser.add_argument( - '--pkl_path', - dest='pkl_path', - help='Path of img_pixel_statistics.pkl', - default=None, - type=str) - parser.add_argument( - '--clip_min_value', - dest='clip_min_value', - help='Min values for clipping data', - nargs='+', - default=None, - type=int) - parser.add_argument( - '--clip_max_value', - dest='clip_max_value', - help='Max values for clipping data', - nargs='+', - default=None, - type=int) - parser.add_argument( - '--separator', - dest='separator', - help='file list separator', - default=" ", - type=str) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def compute_single_img(img, clip_min_value, clip_max_value): - channel = img.shape[2] - means = np.zeros(channel) - stds = np.zeros(channel) - for k in range(channel): - if clip_max_value != [] and clip_min_value != []: - np.clip( - img[:, :, k], - clip_min_value[k], - clip_max_value[k], - out=img[:, :, k]) - - # Rescaling (min-max normalization) - range_value = [ - clip_max_value[i] - clip_min_value[i] - for i in range(len(clip_max_value)) - ] - img_k = (img[:, :, k].astype(np.float32, copy=False) - - clip_min_value[k]) / range_value[k] - else: - img_k = img[:, :, k] - - # count mean, std - means[k] = np.mean(img_k) - stds[k] = np.std(img_k) - return means, stds - - -def cal_normalize_coefficient(data_dir, separator, clip_min_value, - clip_max_value): - train_file_list = osp.join(data_dir, 'train.txt') - val_file_list = osp.join(data_dir, 'val.txt') - test_file_list = osp.join(data_dir, 'test.txt') - total_img_num = 0 - for file_list in [train_file_list, val_file_list, test_file_list]: - with open(file_list, 'r') as fid: - print("\n-----------------------------\nCheck {}...".format( - file_list)) - lines = fid.readlines() - if not lines: - print("File list is empty!") - continue - for line in tqdm(lines): - line = line.strip() - parts = line.split(separator) - img_name, grt_name = parts[0], parts[1] - img_path = os.path.join(data_dir, img_name) - img = read_img(img_path) - if total_img_num == 0: - channel = img.shape[2] - total_means = np.zeros(channel) - total_stds = np.zeros(channel) - means, stds = compute_single_img(img, clip_min_value, - clip_max_value) - total_means += means - total_stds += stds - total_img_num += 1 - - # count mean, std - total_means = total_means / total_img_num - total_stds = total_stds / total_img_num - print("\nCount the channel-by-channel mean and std of the image:\n" - "mean = {}\nstd = {}".format(total_means, total_stds)) - - -def cal_clip_percentage(pkl_path, clip_min_value, clip_max_value): - """ - Calculate the percentage of pixels to be clipped - """ - with open(pkl_path, 'rb') as f: - percentage, img_value_num = pickle.load(f) - - for k in range(len(img_value_num)): - range_pixel = 0 - for i, element in enumerate(img_value_num[k]): - if clip_min_value[k] <= i <= clip_max_value[k]: - range_pixel += element - sum_pixel = sum(img_value_num[k]) - print('channel {}, the percentage of pixels to be clipped = {}'.format( - k, 1 - range_pixel / sum_pixel)) - - -def main(): - args = parse_args() - data_dir = args.data_dir - separator = args.separator - clip_min_value = args.clip_min_value - clip_max_value = args.clip_max_value - pkl_path = args.pkl_path - - cal_normalize_coefficient(data_dir, separator, clip_min_value, - clip_max_value) - cal_clip_percentage(pkl_path, clip_min_value, clip_max_value) - - -if __name__ == "__main__": - main() diff --git a/legacy/contrib/RemoteSensing/tools/create_dataset_list.py b/legacy/contrib/RemoteSensing/tools/create_dataset_list.py deleted file mode 100644 index c1b17372e1..0000000000 --- a/legacy/contrib/RemoteSensing/tools/create_dataset_list.py +++ /dev/null @@ -1,145 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os.path -import argparse -import warnings - - -def parse_args(): - parser = argparse.ArgumentParser( - description= - 'A tool for dividing dataset and generating file lists by file directory structure.' - ) - parser.add_argument('dataset_root', help='dataset root directory', type=str) - parser.add_argument( - '--separator', - dest='separator', - help='file list separator', - default=" ", - type=str) - parser.add_argument( - '--folder', - help='the folder names of images and labels', - type=str, - nargs=2, - default=['images', 'annotations']) - parser.add_argument( - '--second_folder', - help= - 'the second-level folder names of train set, validation set, test set', - type=str, - nargs='*', - default=['train', 'val', 'test']) - parser.add_argument( - '--format', - help='data format of images and labels, default tif, png.', - type=str, - nargs=2, - default=['tif', 'png']) - parser.add_argument( - '--label_class', - help='label class names', - type=str, - nargs='*', - default=['__background__', '__foreground__']) - parser.add_argument( - '--postfix', - help='postfix of images or labels', - type=str, - nargs=2, - default=['', '']) - - return parser.parse_args() - - -def get_files(image_or_label, dataset_split, args): - dataset_root = args.dataset_root - postfix = args.postfix - format = args.format - folder = args.folder - - pattern = '*%s.%s' % (postfix[image_or_label], format[image_or_label]) - - search_files = os.path.join(dataset_root, folder[image_or_label], - dataset_split, pattern) - search_files2 = os.path.join(dataset_root, folder[image_or_label], - dataset_split, "*", pattern) # 包含子目录 - search_files3 = os.path.join(dataset_root, folder[image_or_label], - dataset_split, "*", "*", pattern) # 包含三级目录 - - filenames = glob.glob(search_files) - filenames2 = glob.glob(search_files2) - filenames3 = glob.glob(search_files3) - - filenames = filenames + filenames2 + filenames3 - - return sorted(filenames) - - -def generate_list(args): - dataset_root = args.dataset_root - separator = args.separator - - file_list = os.path.join(dataset_root, 'labels.txt') - with open(file_list, "w") as f: - for label_class in args.label_class: - f.write(label_class + '\n') - - for dataset_split in args.second_folder: - print("Creating {}.txt...".format(dataset_split)) - image_files = get_files(0, dataset_split, args) - label_files = get_files(1, dataset_split, args) - if not image_files: - img_dir = os.path.join(dataset_root, args.folder[0], dataset_split) - warnings.warn("No images in {} !!!".format(img_dir)) - num_images = len(image_files) - - if not label_files: - label_dir = os.path.join(dataset_root, args.folder[1], - dataset_split) - warnings.warn("No labels in {} !!!".format(label_dir)) - num_label = len(label_files) - - if num_images != num_label and num_label > 0: - raise Exception( - "Number of images = {} number of labels = {} \n" - "Either number of images is equal to number of labels, " - "or number of labels is equal to 0.\n" - "Please check your dataset!".format(num_images, num_label)) - - file_list = os.path.join(dataset_root, dataset_split + '.txt') - with open(file_list, "w") as f: - for item in range(num_images): - left = image_files[item].replace(dataset_root, '') - if left[0] == os.path.sep: - left = left.lstrip(os.path.sep) - - try: - right = label_files[item].replace(dataset_root, '') - if right[0] == os.path.sep: - right = right.lstrip(os.path.sep) - line = left + separator + right + '\n' - except: - line = left + '\n' - - f.write(line) - print(line) - - -if __name__ == '__main__': - args = parse_args() - generate_list(args) diff --git a/legacy/contrib/RemoteSensing/tools/data_analyse_and_check.py b/legacy/contrib/RemoteSensing/tools/data_analyse_and_check.py deleted file mode 100644 index b0c0d478e3..0000000000 --- a/legacy/contrib/RemoteSensing/tools/data_analyse_and_check.py +++ /dev/null @@ -1,507 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import os -import os.path as osp -import sys -import argparse -from PIL import Image -from tqdm import tqdm -import imghdr -import logging -import pickle -import gdal - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Data analyse and data check before training.') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='Dataset directory', - default=None, - type=str) - parser.add_argument( - '--num_classes', - dest='num_classes', - help='Number of classes', - default=None, - type=int) - parser.add_argument( - '--separator', - dest='separator', - help='file list separator', - default=" ", - type=str) - parser.add_argument( - '--ignore_index', - dest='ignore_index', - help='Ignored class index', - default=255, - type=int) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -def read_img(img_path): - img_format = imghdr.what(img_path) - name, ext = osp.splitext(img_path) - if img_format == 'tiff' or ext == '.img': - dataset = gdal.Open(img_path) - if dataset == None: - raise Exception('Can not open', img_path) - im_data = dataset.ReadAsArray() - return im_data.transpose((1, 2, 0)) - elif ext == '.npy': - return np.load(img_path) - else: - raise Exception('Not support {} image format!'.format(ext)) - - -def img_pixel_statistics(img, img_value_num, img_min_value, img_max_value): - channel = img.shape[2] - means = np.zeros(channel) - stds = np.zeros(channel) - for k in range(channel): - img_k = img[:, :, k] - - # count mean, std - means[k] = np.mean(img_k) - stds[k] = np.std(img_k) - - # count min, max - min_value = np.min(img_k) - max_value = np.max(img_k) - if img_max_value[k] < max_value: - img_max_value[k] = max_value - if img_min_value[k] > min_value: - img_min_value[k] = min_value - - # count the distribution of image value, value number - unique, counts = np.unique(img_k, return_counts=True) - add_num = [] - max_unique = np.max(unique) - add_len = max_unique - len(img_value_num[k]) + 1 - if add_len > 0: - img_value_num[k] += ([0] * add_len) - for i in range(len(unique)): - value = unique[i] - img_value_num[k][value] += counts[i] - - img_value_num[k] += add_num - return means, stds, img_min_value, img_max_value, img_value_num - - -def data_distribution_statistics(data_dir, img_value_num, logger): - """count the distribution of image value, value number - """ - logger.info( - "\n-----------------------------\nThe whole dataset statistics...") - - if not img_value_num: - return - logger.info("\nImage pixel statistics:") - total_ratio = [] - [total_ratio.append([]) for i in range(len(img_value_num))] - for k in range(len(img_value_num)): - total_num = sum(img_value_num[k]) - total_ratio[k] = [i / total_num for i in img_value_num[k]] - total_ratio[k] = np.around(total_ratio[k], decimals=4) - with open(os.path.join(data_dir, 'img_pixel_statistics.pkl'), 'wb') as f: - pickle.dump([total_ratio, img_value_num], f) - - -def data_range_statistics(img_min_value, img_max_value, logger): - """print min value, max value - """ - logger.info("value range: \nimg_min_value = {} \nimg_max_value = {}".format( - img_min_value, img_max_value)) - - -def cal_normalize_coefficient(total_means, total_stds, total_img_num, logger): - """count mean, std - """ - total_means = total_means / total_img_num - total_stds = total_stds / total_img_num - logger.info("\nCount the channel-by-channel mean and std of the image:\n" - "mean = {}\nstd = {}".format(total_means, total_stds)) - - -def error_print(str): - return "".join(["\nNOT PASS ", str]) - - -def correct_print(str): - return "".join(["\nPASS ", str]) - - -def pil_imread(file_path): - """read pseudo-color label""" - im = Image.open(file_path) - return np.asarray(im) - - -def get_img_shape_range(img, max_width, max_height, min_width, min_height): - """获取图片最大和最小宽高""" - img_shape = img.shape - height, width = img_shape[0], img_shape[1] - max_height = max(height, max_height) - max_width = max(width, max_width) - min_height = min(height, min_height) - min_width = min(width, min_width) - return max_width, max_height, min_width, min_height - - -def get_img_channel_num(img, img_channels): - """获取图像的通道数""" - img_shape = img.shape - if img_shape[-1] not in img_channels: - img_channels.append(img_shape[-1]) - return img_channels - - -def is_label_single_channel(label): - """判断标签是否为灰度图""" - label_shape = label.shape - if len(label_shape) == 2: - return True - else: - return False - - -def image_label_shape_check(img, label): - """ - 验证图像和标注的大小是否匹配 - """ - - flag = True - img_height = img.shape[0] - img_width = img.shape[1] - label_height = label.shape[0] - label_width = label.shape[1] - - if img_height != label_height or img_width != label_width: - flag = False - return flag - - -def ground_truth_check(label, label_path): - """ - 验证标注图像的格式 - 统计标注图类别和像素数 - params: - label: 标注图 - label_path: 标注图路径 - return: - png_format: 返回是否是png格式图片 - unique: 返回标注类别 - counts: 返回标注的像素数 - """ - if imghdr.what(label_path) == "png": - png_format = True - else: - png_format = False - - unique, counts = np.unique(label, return_counts=True) - - return png_format, unique, counts - - -def sum_label_check(label_classes, num_of_each_class, ignore_index, num_classes, - total_label_classes, total_num_of_each_class): - """ - 统计所有标注图上的类别和每个类别的像素数 - params: - label_classes: 标注类别 - num_of_each_class: 各个类别的像素数目 - """ - is_label_correct = True - - if ignore_index in label_classes: - label_classes2 = np.delete(label_classes, - np.where(label_classes == ignore_index)) - else: - label_classes2 = label_classes - if min(label_classes2) < 0 or max(label_classes2) > num_classes - 1: - is_label_correct = False - add_class = [] - add_num = [] - for i in range(len(label_classes)): - gi = label_classes[i] - if gi in total_label_classes: - j = total_label_classes.index(gi) - total_num_of_each_class[j] += num_of_each_class[i] - else: - add_class.append(gi) - add_num.append(num_of_each_class[i]) - total_num_of_each_class += add_num - total_label_classes += add_class - return is_label_correct, total_num_of_each_class, total_label_classes - - -def label_class_check(num_classes, total_label_classes, total_num_of_each_class, - wrong_labels, logger): - """ - 检查实际标注类别是否和配置参数`num_classes`,`ignore_index`匹配。 - - **NOTE:** - 标注图像类别数值必须在[0~(`num_classes`-1)]范围内或者为`ignore_index`。 - 标注类别最好从0开始,否则可能影响精度。 - """ - total_ratio = total_num_of_each_class / sum(total_num_of_each_class) - total_ratio = np.around(total_ratio, decimals=4) - total_nc = sorted( - zip(total_label_classes, total_ratio, total_num_of_each_class)) - if len(wrong_labels) == 0 and not total_nc[0][0]: - logger.info(correct_print("label class check!")) - else: - logger.info(error_print("label class check!")) - if total_nc[0][0]: - logger.info("Warning: label classes should start from 0") - if len(wrong_labels) > 0: - logger.info( - "fatal error: label class is out of range [0, {}]".format( - num_classes - 1)) - for i in wrong_labels: - logger.debug(i) - return total_nc - - -def label_class_statistics(total_nc, logger): - """ - 对标注图像进行校验,输出校验结果 - """ - logger.info( - "\nLabel class statistics:\n" - "(label class, percentage, total pixel number) = {} ".format(total_nc)) - - -def shape_check(shape_unequal_image, logger): - """输出shape校验结果""" - if len(shape_unequal_image) == 0: - logger.info(correct_print("shape check")) - logger.info("All images are the same shape as the labels") - else: - logger.info(error_print("shape check")) - logger.info( - "Some images are not the same shape as the labels as follow: ") - for i in shape_unequal_image: - logger.debug(i) - - -def separator_check(wrong_lines, file_list, separator, logger): - """检查分割符是否复合要求""" - if len(wrong_lines) == 0: - logger.info( - correct_print( - file_list.split(os.sep)[-1] + " DATASET.separator check")) - else: - logger.info( - error_print( - file_list.split(os.sep)[-1] + " DATASET.separator check")) - logger.info( - "The following list is not separated by {}".format(separator)) - for i in wrong_lines: - logger.debug(i) - - -def imread_check(imread_failed, logger): - if len(imread_failed) == 0: - logger.info(correct_print("dataset reading check")) - logger.info("All images can be read successfully") - else: - logger.info(error_print("dataset reading check")) - logger.info("Failed to read {} images".format(len(imread_failed))) - for i in imread_failed: - logger.debug(i) - - -def single_channel_label_check(label_not_single_channel, logger): - if len(label_not_single_channel) == 0: - logger.info(correct_print("label single_channel check")) - logger.info("All label images are single_channel") - else: - logger.info(error_print("label single_channel check")) - logger.info( - "{} label images are not single_channel\nLabel pixel statistics may be insignificant" - .format(len(label_not_single_channel))) - for i in label_not_single_channel: - logger.debug(i) - - -def img_shape_range_statistics(max_width, min_width, max_height, min_height, - logger): - logger.info("\nImage size statistics:") - logger.info( - "max width = {} min width = {} max height = {} min height = {}". - format(max_width, min_width, max_height, min_height)) - - -def img_channels_statistics(img_channels, logger): - logger.info("\nImage channels statistics\nImage channels = {}".format( - np.unique(img_channels))) - - -def data_analyse_and_check(data_dir, num_classes, separator, ignore_index, - logger): - train_file_list = osp.join(data_dir, 'train.txt') - val_file_list = osp.join(data_dir, 'val.txt') - test_file_list = osp.join(data_dir, 'test.txt') - total_img_num = 0 - has_label = False - for file_list in [train_file_list, val_file_list, test_file_list]: - # initialization - imread_failed = [] - max_width = 0 - max_height = 0 - min_width = sys.float_info.max - min_height = sys.float_info.max - label_not_single_channel = [] - shape_unequal_image = [] - wrong_labels = [] - wrong_lines = [] - total_label_classes = [] - total_num_of_each_class = [] - img_channels = [] - - with open(file_list, 'r') as fid: - logger.info("\n-----------------------------\nCheck {}...".format( - file_list)) - lines = fid.readlines() - if not lines: - logger.info("File list is empty!") - continue - for line in tqdm(lines): - line = line.strip() - parts = line.split(separator) - if len(parts) == 1: - if file_list == train_file_list or file_list == val_file_list: - logger.info("Train or val list must have labels!") - break - img_name = parts - img_path = os.path.join(data_dir, img_name[0]) - try: - img = read_img(img_path) - except Exception as e: - imread_failed.append((line, str(e))) - continue - elif len(parts) == 2: - has_label = True - img_name, label_name = parts[0], parts[1] - img_path = os.path.join(data_dir, img_name) - label_path = os.path.join(data_dir, label_name) - try: - img = read_img(img_path) - label = pil_imread(label_path) - except Exception as e: - imread_failed.append((line, str(e))) - continue - - is_single_channel = is_label_single_channel(label) - if not is_single_channel: - label_not_single_channel.append(line) - continue - is_equal_img_label_shape = image_label_shape_check( - img, label) - if not is_equal_img_label_shape: - shape_unequal_image.append(line) - png_format, label_classes, num_of_each_class = ground_truth_check( - label, label_path) - is_label_correct, total_num_of_each_class, total_label_classes = sum_label_check( - label_classes, num_of_each_class, ignore_index, - num_classes, total_label_classes, - total_num_of_each_class) - if not is_label_correct: - wrong_labels.append(line) - else: - wrong_lines.append(lines) - continue - - if total_img_num == 0: - channel = img.shape[2] - total_means = np.zeros(channel) - total_stds = np.zeros(channel) - img_min_value = [sys.float_info.max] * channel - img_max_value = [0] * channel - img_value_num = [] - [img_value_num.append([]) for i in range(channel)] - means, stds, img_min_value, img_max_value, img_value_num = img_pixel_statistics( - img, img_value_num, img_min_value, img_max_value) - total_means += means - total_stds += stds - max_width, max_height, min_width, min_height = get_img_shape_range( - img, max_width, max_height, min_width, min_height) - img_channels = get_img_channel_num(img, img_channels) - total_img_num += 1 - - # data check - separator_check(wrong_lines, file_list, separator, logger) - imread_check(imread_failed, logger) - if has_label: - single_channel_label_check(label_not_single_channel, logger) - shape_check(shape_unequal_image, logger) - total_nc = label_class_check(num_classes, total_label_classes, - total_num_of_each_class, - wrong_labels, logger) - - # data analyse on train, validation, test set. - img_channels_statistics(img_channels, logger) - img_shape_range_statistics(max_width, min_width, max_height, - min_height, logger) - if has_label: - label_class_statistics(total_nc, logger) - # data analyse on the whole dataset. - data_range_statistics(img_min_value, img_max_value, logger) - data_distribution_statistics(data_dir, img_value_num, logger) - cal_normalize_coefficient(total_means, total_stds, total_img_num, logger) - - -def main(): - args = parse_args() - data_dir = args.data_dir - ignore_index = args.ignore_index - num_classes = args.num_classes - separator = args.separator - - logger = logging.getLogger() - logger.setLevel('DEBUG') - BASIC_FORMAT = "%(message)s" - formatter = logging.Formatter(BASIC_FORMAT) - sh = logging.StreamHandler() - sh.setFormatter(formatter) - sh.setLevel('INFO') - th = logging.FileHandler( - os.path.join(data_dir, 'data_analyse_and_check.log'), 'w') - th.setFormatter(formatter) - logger.addHandler(sh) - logger.addHandler(th) - - data_analyse_and_check(data_dir, num_classes, separator, ignore_index, - logger) - - print("\nDetailed error information can be viewed in {}.".format( - os.path.join(data_dir, 'data_analyse_and_check.log'))) - - -if __name__ == "__main__": - main() diff --git a/legacy/contrib/RemoteSensing/tools/data_distribution_vis.py b/legacy/contrib/RemoteSensing/tools/data_distribution_vis.py deleted file mode 100644 index d911c3234c..0000000000 --- a/legacy/contrib/RemoteSensing/tools/data_distribution_vis.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pickle -import sys -import argparse -import matplotlib.pyplot as plt - - -def parse_args(): - parser = argparse.ArgumentParser( - description='Visualize data distribution before training.') - parser.add_argument( - '--pkl_path', - dest='pkl_path', - help='Path of img_pixel_statistics.pkl', - default=None, - type=str) - if len(sys.argv) == 1: - parser.print_help() - sys.exit(1) - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - path = args.pkl_path - with open(path, 'rb') as f: - percentage, img_value_num = pickle.load(f) - - for k in range(len(img_value_num)): - print('channel = {}'.format(k)) - plt.bar( - list(range(len(img_value_num[k]))), - img_value_num[k], - width=1, - log=True) - plt.xlabel('image value') - plt.ylabel('number') - plt.title('channel={}'.format(k)) - plt.show() diff --git a/legacy/contrib/RemoteSensing/tools/split_dataset_list.py b/legacy/contrib/RemoteSensing/tools/split_dataset_list.py deleted file mode 100644 index 9b122a9e2e..0000000000 --- a/legacy/contrib/RemoteSensing/tools/split_dataset_list.py +++ /dev/null @@ -1,151 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import glob -import os.path -import argparse -import warnings -import numpy as np - - -def parse_args(): - parser = argparse.ArgumentParser( - description= - 'A tool for proportionally randomizing dataset to produce file lists.') - parser.add_argument('dataset_root', help='the dataset root path', type=str) - parser.add_argument( - 'images_dir_name', help='the directory name of images', type=str) - parser.add_argument( - 'labels_dir_name', help='the directory name of labels', type=str) - parser.add_argument( - '--split', help='', nargs=3, type=float, default=[0.7, 0.3, 0]) - parser.add_argument( - '--label_class', - help='label class names', - type=str, - nargs='*', - default=['__background__', '__foreground__']) - parser.add_argument( - '--separator', - dest='separator', - help='file list separator', - default=" ", - type=str) - parser.add_argument( - '--format', - help='data format of images and labels, e.g. jpg, tif or png.', - type=str, - nargs=2, - default=['tif', 'png']) - parser.add_argument( - '--postfix', - help='postfix of images or labels', - type=str, - nargs=2, - default=['', '']) - - return parser.parse_args() - - -def get_files(path, format, postfix): - pattern = '*%s.%s' % (postfix, format) - - search_files = os.path.join(path, pattern) - search_files2 = os.path.join(path, "*", pattern) # 包含子目录 - search_files3 = os.path.join(path, "*", "*", pattern) # 包含三级目录 - - filenames = glob.glob(search_files) - filenames2 = glob.glob(search_files2) - filenames3 = glob.glob(search_files3) - - filenames = filenames + filenames2 + filenames3 - - return sorted(filenames) - - -def generate_list(args): - separator = args.separator - dataset_root = args.dataset_root - if sum(args.split) != 1.0: - raise ValueError("划分比例之和必须为1") - - file_list = os.path.join(dataset_root, 'labels.txt') - with open(file_list, "w") as f: - for label_class in args.label_class: - f.write(label_class + '\n') - - image_dir = os.path.join(dataset_root, args.images_dir_name) - label_dir = os.path.join(dataset_root, args.labels_dir_name) - image_files = get_files(image_dir, args.format[0], args.postfix[0]) - label_files = get_files(label_dir, args.format[1], args.postfix[1]) - if not image_files: - warnings.warn("No files in {}".format(image_dir)) - num_images = len(image_files) - - if not label_files: - warnings.warn("No files in {}".format(label_dir)) - num_label = len(label_files) - - if num_images != num_label and num_label > 0: - raise Exception("Number of images = {} number of labels = {} \n" - "Either number of images is equal to number of labels, " - "or number of labels is equal to 0.\n" - "Please check your dataset!".format( - num_images, num_label)) - - image_files = np.array(image_files) - label_files = np.array(label_files) - state = np.random.get_state() - np.random.shuffle(image_files) - np.random.set_state(state) - np.random.shuffle(label_files) - - start = 0 - num_split = len(args.split) - dataset_name = ['train', 'val', 'test'] - for i in range(num_split): - dataset_split = dataset_name[i] - print("Creating {}.txt...".format(dataset_split)) - if args.split[i] > 1.0 or args.split[i] < 0: - raise ValueError( - "{} dataset percentage should be 0~1.".format(dataset_split)) - - file_list = os.path.join(dataset_root, dataset_split + '.txt') - with open(file_list, "w") as f: - num = round(args.split[i] * num_images) - end = start + num - if i == num_split - 1: - end = num_images - for item in range(start, end): - left = image_files[item].replace(dataset_root, '') - if left[0] == os.path.sep: - left = left.lstrip(os.path.sep) - - try: - right = label_files[item].replace(dataset_root, '') - if right[0] == os.path.sep: - right = right.lstrip(os.path.sep) - line = left + separator + right + '\n' - except: - line = left + '\n' - - f.write(line) - print(line) - start = end - - -if __name__ == '__main__': - args = parse_args() - generate_list(args) diff --git a/legacy/contrib/RemoteSensing/train_demo.py b/legacy/contrib/RemoteSensing/train_demo.py deleted file mode 100644 index 91842138a5..0000000000 --- a/legacy/contrib/RemoteSensing/train_demo.py +++ /dev/null @@ -1,170 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os.path as osp -import argparse - -import transforms.transforms as T -from readers.reader import Reader -from models import UNet, HRNet -from utils import paddle_utils - - -def parse_args(): - parser = argparse.ArgumentParser(description='RemoteSensing training') - parser.add_argument( - '--model_type', - dest='model_type', - help="Model type for traing, which is one of ('unet', 'hrnet')", - type=str, - default='hrnet') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='dataset directory', - default=None, - type=str) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='model save directory', - default=None, - type=str) - parser.add_argument( - '--num_classes', - dest='num_classes', - help='Number of classes', - default=None, - type=int) - parser.add_argument( - '--channel', - dest='channel', - help='number of data channel', - default=3, - type=int) - parser.add_argument( - '--clip_min_value', - dest='clip_min_value', - help='Min values for clipping data', - nargs='+', - default=None, - type=int) - parser.add_argument( - '--clip_max_value', - dest='clip_max_value', - help='Max values for clipping data', - nargs='+', - default=None, - type=int) - parser.add_argument( - '--mean', - dest='mean', - help='Data means', - nargs='+', - default=None, - type=float) - parser.add_argument( - '--std', - dest='std', - help='Data standard deviation', - nargs='+', - default=None, - type=float) - parser.add_argument( - '--num_epochs', - dest='num_epochs', - help='number of traing epochs', - default=100, - type=int) - parser.add_argument( - '--train_batch_size', - dest='train_batch_size', - help='training batch size', - default=4, - type=int) - parser.add_argument( - '--lr', dest='lr', help='learning rate', default=0.01, type=float) - return parser.parse_args() - - -paddle_utils.enable_static() -args = parse_args() -data_dir = args.data_dir -save_dir = args.save_dir -num_classes = args.num_classes -channel = args.channel -clip_min_value = args.clip_min_value -clip_max_value = args.clip_max_value -mean = args.mean -std = args.std -num_epochs = args.num_epochs -train_batch_size = args.train_batch_size -lr = args.lr - -# 定义训练和验证时的transforms -train_transforms = T.Compose([ - T.RandomVerticalFlip(0.5), - T.RandomHorizontalFlip(0.5), - T.ResizeStepScaling(0.5, 2.0, 0.25), - T.RandomPaddingCrop(1000), - T.Clip(min_val=clip_min_value, max_val=clip_max_value), - T.Normalize( - min_val=clip_min_value, max_val=clip_max_value, mean=mean, std=std), -]) - -eval_transforms = T.Compose([ - T.Clip(min_val=clip_min_value, max_val=clip_max_value), - T.Normalize( - min_val=clip_min_value, max_val=clip_max_value, mean=mean, std=std), -]) - -train_list = osp.join(data_dir, 'train.txt') -val_list = osp.join(data_dir, 'val.txt') -label_list = osp.join(data_dir, 'labels.txt') - -# 定义数据读取器 -train_reader = Reader( - data_dir=data_dir, - file_list=train_list, - label_list=label_list, - transforms=train_transforms, - shuffle=True) - -eval_reader = Reader( - data_dir=data_dir, - file_list=val_list, - label_list=label_list, - transforms=eval_transforms) - -if args.model_type == 'unet': - model = UNet(num_classes=num_classes, input_channel=channel) -elif args.model_type == 'hrnet': - model = HRNet(num_classes=num_classes, input_channel=channel) -else: - raise ValueError( - "--model_type: {} is set wrong, it shold be one of ('unet', " - "'hrnet')".format(args.model_type)) - -model.train( - num_epochs=num_epochs, - train_reader=train_reader, - train_batch_size=train_batch_size, - eval_reader=eval_reader, - eval_best_metric='miou', - save_interval_epochs=5, - log_interval_steps=10, - save_dir=save_dir, - learning_rate=lr, - use_vdl=True) diff --git a/legacy/contrib/RemoteSensing/transforms/__init__.py b/legacy/contrib/RemoteSensing/transforms/__init__.py deleted file mode 100644 index 2cc178c2f9..0000000000 --- a/legacy/contrib/RemoteSensing/transforms/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import transforms -from . import ops diff --git a/legacy/contrib/RemoteSensing/transforms/ops.py b/legacy/contrib/RemoteSensing/transforms/ops.py deleted file mode 100644 index 9abb18de10..0000000000 --- a/legacy/contrib/RemoteSensing/transforms/ops.py +++ /dev/null @@ -1,179 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import math -import numpy as np -from PIL import Image, ImageEnhance - - -def normalize(im, min_value, max_value, mean, std): - # Rescaling (min-max normalization) - range_value = [max_value[i] - min_value[i] for i in range(len(max_value))] - im = (im.astype(np.float32, copy=False) - min_value) / range_value - - # Standardization (Z-score Normalization) - im -= mean - im /= std - return im - - -def permute(im, to_bgr=False): - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - if to_bgr: - im = im[[2, 1, 0], :, :] - return im - - -def _resize(im, shape): - return cv2.resize(im, shape) - - -def resize_short(im, short_size=224): - percent = float(short_size) / min(im.shape[0], im.shape[1]) - resized_width = int(round(im.shape[1] * percent)) - resized_height = int(round(im.shape[0] * percent)) - im = _resize(im, shape=(resized_width, resized_height)) - return im - - -def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): - value = max(im.shape[0], im.shape[1]) - scale = float(long_size) / float(value) - im = cv2.resize(im, (0, 0), fx=scale, fy=scale, interpolation=interpolation) - return im - - -def random_crop(im, - crop_size=224, - lower_scale=0.08, - lower_ratio=3. / 4, - upper_ratio=4. / 3): - scale = [lower_scale, 1.0] - ratio = [lower_ratio, upper_ratio] - aspect_ratio = math.sqrt(np.random.uniform(*ratio)) - w = 1. * aspect_ratio - h = 1. / aspect_ratio - bound = min((float(im.shape[0]) / im.shape[1]) / (h**2), - (float(im.shape[1]) / im.shape[0]) / (w**2)) - scale_max = min(scale[1], bound) - scale_min = min(scale[0], bound) - target_area = im.shape[0] * im.shape[1] * np.random.uniform( - scale_min, scale_max) - target_size = math.sqrt(target_area) - w = int(target_size * w) - h = int(target_size * h) - i = np.random.randint(0, im.shape[0] - h + 1) - j = np.random.randint(0, im.shape[1] - w + 1) - im = im[i:i + h, j:j + w, :] - im = _resize(im, shape=(crop_size, crop_size)) - return im - - -def center_crop(im, crop_size=224): - height, width = im.shape[:2] - w_start = (width - crop_size) // 2 - h_start = (height - crop_size) // 2 - w_end = w_start + crop_size - h_end = h_start + crop_size - im = im[h_start:h_end, w_start:w_end, :] - return im - - -def horizontal_flip(im): - if len(im.shape) == 3: - im = im[:, ::-1, :] - elif len(im.shape) == 2: - im = im[:, ::-1] - return im - - -def vertical_flip(im): - if len(im.shape) == 3: - im = im[::-1, :, :] - elif len(im.shape) == 2: - im = im[::-1, :] - return im - - -def bgr2rgb(im): - return im[:, :, ::-1] - - -def brightness(im, brightness_lower, brightness_upper): - brightness_delta = np.random.uniform(brightness_lower, brightness_upper) - im = ImageEnhance.Brightness(im).enhance(brightness_delta) - return im - - -def contrast(im, contrast_lower, contrast_upper): - contrast_delta = np.random.uniform(contrast_lower, contrast_upper) - im = ImageEnhance.Contrast(im).enhance(contrast_delta) - return im - - -def saturation(im, saturation_lower, saturation_upper): - saturation_delta = np.random.uniform(saturation_lower, saturation_upper) - im = ImageEnhance.Color(im).enhance(saturation_delta) - return im - - -def hue(im, hue_lower, hue_upper): - hue_delta = np.random.uniform(hue_lower, hue_upper) - im = np.array(im.convert('HSV')) - im[:, :, 0] = im[:, :, 0] + hue_delta - im = Image.fromarray(im, mode='HSV').convert('RGB') - return im - - -def rotate(im, rotate_lower, rotate_upper): - rotate_delta = np.random.uniform(rotate_lower, rotate_upper) - im = im.rotate(int(rotate_delta)) - return im - - -def resize_padding(im, max_side_len=2400): - ''' - resize image to a size multiple of 32 which is required by the network - :param im: the resized image - :param max_side_len: limit of max image size to avoid out of memory in gpu - :return: the resized image and the resize ratio - ''' - h, w, _ = im.shape - - resize_w = w - resize_h = h - - # limit the max side - if max(resize_h, resize_w) > max_side_len: - ratio = float( - max_side_len) / resize_h if resize_h > resize_w else float( - max_side_len) / resize_w - else: - ratio = 1. - resize_h = int(resize_h * ratio) - resize_w = int(resize_w * ratio) - - resize_h = resize_h if resize_h % 32 == 0 else (resize_h // 32 - 1) * 32 - resize_w = resize_w if resize_w % 32 == 0 else (resize_w // 32 - 1) * 32 - resize_h = max(32, resize_h) - resize_w = max(32, resize_w) - im = cv2.resize(im, (int(resize_w), int(resize_h))) - #im = cv2.resize(im, (512, 512)) - ratio_h = resize_h / float(h) - ratio_w = resize_w / float(w) - _ratio = np.array([ratio_h, ratio_w]).reshape(-1, 2) - return im, _ratio diff --git a/legacy/contrib/RemoteSensing/transforms/transforms.py b/legacy/contrib/RemoteSensing/transforms/transforms.py deleted file mode 100644 index fcddf4ad5b..0000000000 --- a/legacy/contrib/RemoteSensing/transforms/transforms.py +++ /dev/null @@ -1,810 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .ops import * -import random -import numpy as np -import cv2 -from collections import OrderedDict -from readers.reader import read_img - - -class Compose: - """根据数据预处理/增强算子对输入数据进行操作。 - 所有操作的输入图像流形状均是[H, W, C],其中H为图像高,W为图像宽,C为图像通道数。 - - Args: - transforms (list): 数据预处理/增强算子。 - - Raises: - TypeError: transforms不是list对象 - ValueError: transforms元素个数小于1。 - - """ - - def __init__(self, transforms): - if not isinstance(transforms, list): - raise TypeError('The transforms must be a list!') - if len(transforms) < 1: - raise ValueError('The length of transforms ' + \ - 'must be equal or larger than 1!') - self.transforms = transforms - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (str/np.ndarray): 图像路径/图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息,dict中的字段如下: - - shape_before_resize (tuple): 图像resize之前的大小(h, w)。 - - shape_before_padding (tuple): 图像padding之前的大小(h, w)。 - label (str/np.ndarray): 标注图像路径/标注图像np.ndarray数据。 - - Returns: - tuple: 根据网络所需字段所组成的tuple;字段由transforms中的最后一个数据预处理操作决定。 - """ - - if im_info is None: - im_info = dict() - im = read_img(im) - if im is None: - raise ValueError('Can\'t read The image file {}!'.format(im)) - if label is not None: - label = read_img(label) - - for op in self.transforms: - outputs = op(im, im_info, label) - im = outputs[0] - if len(outputs) >= 2: - im_info = outputs[1] - if len(outputs) == 3: - label = outputs[2] - return outputs - - -class RandomHorizontalFlip: - """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机水平翻转的概率。默认值为0.5。 - - """ - - def __init__(self, prob=0.5): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if random.random() < self.prob: - im = horizontal_flip(im) - if label is not None: - label = horizontal_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomVerticalFlip: - """以一定的概率对图像进行垂直翻转。当存在标注图像时,则同步进行翻转。 - - Args: - prob (float): 随机垂直翻转的概率。默认值为0.1。 - """ - - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if random.random() < self.prob: - im = vertical_flip(im) - if label is not None: - label = vertical_flip(label) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Resize: - """调整图像大小(resize),当存在标注图像时,则同步进行处理。 - - - 当目标大小(target_size)类型为int时,根据插值方式, - 将图像resize为[target_size, target_size]。 - - 当目标大小(target_size)类型为list或tuple时,根据插值方式, - 将图像resize为target_size, target_size的输入应为[w, h]或(w, h)。 - - Args: - target_size (int/list/tuple): 目标大小 - interp (str): resize的插值方式,与opencv的插值方式对应, - 可选的值为['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4'],默认为"LINEAR"。 - - Raises: - TypeError: target_size不是int/list/tuple。 - ValueError: target_size为list/tuple时元素个数不等于2。 - AssertionError: interp的取值不在['NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4']之内 - """ - - # The interpolation mode - interp_dict = { - 'NEAREST': cv2.INTER_NEAREST, - 'LINEAR': cv2.INTER_LINEAR, - 'CUBIC': cv2.INTER_CUBIC, - 'AREA': cv2.INTER_AREA, - 'LANCZOS4': cv2.INTER_LANCZOS4 - } - - def __init__(self, target_size, interp='LINEAR'): - self.interp = interp - assert interp in self.interp_dict, "interp should be one of {}".format( - self.interp_dict.keys()) - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise ValueError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - - self.target_size = target_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info跟新字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - - Raises: - ZeroDivisionError: im的短边为0。 - TypeError: im不是np.ndarray数据。 - ValueError: im不是3维nd.ndarray。 - """ - if im_info is None: - im_info = OrderedDict() - im_info['shape_before_resize'] = im.shape[:2] - - if not isinstance(im, np.ndarray): - raise TypeError("ResizeImage: image type is not np.ndarray.") - if len(im.shape) != 3: - raise ValueError('ResizeImage: image is not 3-dimensional.') - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - if float(im_size_min) == 0: - raise ZeroDivisionError('ResizeImage: min size of image is 0') - - if isinstance(self.target_size, int): - resize_w = self.target_size - resize_h = self.target_size - else: - resize_w = self.target_size[0] - resize_h = self.target_size[1] - im_scale_x = float(resize_w) / float(im_shape[1]) - im_scale_y = float(resize_h) / float(im_shape[0]) - - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp_dict[self.interp]) - if label is not None: - label = cv2.resize( - label, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp_dict['NEAREST']) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeByLong: - """对图像长边resize到固定值,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - long_size (int): resize后图像的长边大小。 - """ - - def __init__(self, long_size): - self.long_size = long_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_resize (tuple): 保存resize之前图像的形状(h, w)。 - """ - if im_info is None: - im_info = OrderedDict() - - im_info['shape_before_resize'] = im.shape[:2] - im = resize_long(im, self.long_size) - if label is not None: - label = resize_long(label, self.long_size, cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeRangeScaling: - """对图像长边随机resize到指定范围内,短边按比例进行缩放。当存在标注图像时,则同步进行处理。 - - Args: - min_value (int): 图像长边resize后的最小值。默认值400。 - max_value (int): 图像长边resize后的最大值。默认值600。 - - Raises: - ValueError: min_value大于max_value - """ - - def __init__(self, min_value=400, max_value=600): - if min_value > max_value: - raise ValueError('min_value must be less than max_value, ' - 'but they are {} and {}.'.format( - min_value, max_value)) - self.min_value = min_value - self.max_value = max_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_value == self.max_value: - random_size = self.max_value - else: - random_size = int( - np.random.uniform(self.min_value, self.max_value) + 0.5) - value = max(im.shape[0], im.shape[1]) - scale = float(random_size) / float(value) - im = cv2.resize( - im, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR) - if label is not None: - label = cv2.resize( - label, (0, 0), - fx=scale, - fy=scale, - interpolation=cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ResizeStepScaling: - """对图像按照某一个比例resize,这个比例以scale_step_size为步长 - 在[min_scale_factor, max_scale_factor]随机变动。当存在标注图像时,则同步进行处理。 - - Args: - min_scale_factor(float), resize最小尺度。默认值0.75。 - max_scale_factor (float), resize最大尺度。默认值1.25。 - scale_step_size (float), resize尺度范围间隔。默认值0.25。 - - Raises: - ValueError: min_scale_factor大于max_scale_factor - """ - - def __init__(self, - min_scale_factor=0.75, - max_scale_factor=1.25, - scale_step_size=0.25): - if min_scale_factor > max_scale_factor: - raise ValueError( - 'min_scale_factor must be less than max_scale_factor, ' - 'but they are {} and {}.'.format(min_scale_factor, - max_scale_factor)) - self.min_scale_factor = min_scale_factor - self.max_scale_factor = max_scale_factor - self.scale_step_size = scale_step_size - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_scale_factor == self.max_scale_factor: - scale_factor = self.min_scale_factor - - elif self.scale_step_size == 0: - scale_factor = np.random.uniform(self.min_scale_factor, - self.max_scale_factor) - - else: - num_steps = int((self.max_scale_factor - self.min_scale_factor) / - self.scale_step_size + 1) - scale_factors = np.linspace(self.min_scale_factor, - self.max_scale_factor, - num_steps).tolist() - np.random.shuffle(scale_factors) - scale_factor = scale_factors[0] - - im = cv2.resize( - im, (0, 0), - fx=scale_factor, - fy=scale_factor, - interpolation=cv2.INTER_LINEAR) - if label is not None: - label = cv2.resize( - label, (0, 0), - fx=scale_factor, - fy=scale_factor, - interpolation=cv2.INTER_NEAREST) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Clip: - """ - 对图像上超出一定范围的数据进行裁剪。 - - Args: - min_val (list): 裁剪的下限,小于min_val的数值均设为min_val. 默认值[0, 0, 0]. - max_val (list): 裁剪的上限,大于max_val的数值均设为max_val. 默认值[255.0, 255.0, 255.0] - """ - - def __init__(self, min_val=[0, 0, 0], max_val=[255.0, 255.0, 255.0]): - self.min_val = min_val - self.max_val = max_val - - def __call__(self, im, im_info=None, label=None): - if isinstance(self.min_val, list) and isinstance(self.max_val, list): - for k in range(im.shape[2]): - np.clip( - im[:, :, k], - self.min_val[k], - self.max_val[k], - out=im[:, :, k]) - else: - raise TypeError('min_val and max_val must be list') - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Normalize: - """对图像进行标准化。 - 1.图像像素归一化到区间 [0.0, 1.0]。 - 2.对图像进行减均值除以标准差操作。 - - Args: - min_val (list): 图像数据集的最小值。默认值[0, 0, 0]. - max_val (list): 图像数据集的最大值。默认值[255.0, 255.0, 255.0] - mean (list): 图像数据集的均值。默认值[0.5, 0.5, 0.5]. - std (list): 图像数据集的标准差。默认值[0.5, 0.5, 0.5]. - - Raises: - ValueError: mean或std不是list对象。std包含0。 - """ - - def __init__(self, - min_val=[0, 0, 0], - max_val=[255.0, 255.0, 255.0], - mean=[0.5, 0.5, 0.5], - std=[0.5, 0.5, 0.5]): - self.min_val = min_val - self.max_val = max_val - self.mean = mean - self.std = std - if not (isinstance(self.mean, list) and isinstance(self.std, list)): - raise ValueError("{}: input type is invalid.".format(self)) - from functools import reduce - if reduce(lambda x, y: x * y, self.std) == 0: - raise ValueError('{}: std is invalid!'.format(self)) - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - - im = normalize(im, self.min_val, self.max_val, mean, std) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class Padding: - """对图像或标注图像进行padding,padding方向为右和下。 - 根据提供的值对图像或标注图像进行padding操作。 - - Args: - target_size (int/list/tuple): padding后图像的大小。 - im_padding_value (list): 图像padding的值。默认为127.5。 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: target_size不是int/list/tuple。 - ValueError: target_size为list/tuple时元素个数不等于2。 - """ - - def __init__(self, - target_size, - im_padding_value=127.5, - label_padding_value=255): - if isinstance(target_size, list) or isinstance(target_size, tuple): - if len(target_size) != 2: - raise ValueError( - 'when target is list or tuple, it should include 2 elements, but it is {}' - .format(target_size)) - elif not isinstance(target_size, int): - raise TypeError( - "Type of target_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(target_size))) - self.target_size = target_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - 其中,im_info新增字段为: - -shape_before_padding (tuple): 保存padding之前图像的形状(h, w)。 - - Raises: - ValueError: 输入图像im或label的形状大于目标值 - """ - if im_info is None: - im_info = OrderedDict() - im_info['shape_before_padding'] = im.shape[:2] - - im_height, im_width = im.shape[0], im.shape[1] - if isinstance(self.target_size, int): - target_height = self.target_size - target_width = self.target_size - else: - target_height = self.target_size[1] - target_width = self.target_size[0] - pad_height = target_height - im_height - pad_width = target_width - im_width - if pad_height < 0 or pad_width < 0: - raise ValueError( - 'the size of image should be less than target_size, but the size of image ({}, {}), is larger than target_size ({}, {})' - .format(im_width, im_height, target_width, target_height)) - else: - im = np.pad( - im, - pad_width=((0, pad_height), (0, pad_width), (0, 0)), - mode='constant', - constant_values=(self.im_padding_value, self.im_padding_value)) - if label is not None: - label = np.pad( - label, - pad_width=((0, pad_height), (0, pad_width)), - mode='constant', - constant_values=(self.label_padding_value, - self.label_padding_value)) - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomPaddingCrop: - """对图像和标注图进行随机裁剪,当所需要的裁剪尺寸大于原图时,则进行padding操作。 - - Args: - crop_size(int or list or tuple): 裁剪图像大小。默认为512。 - im_padding_value (list): 图像padding的值。默认为127.5 - label_padding_value (int): 标注图像padding的值。默认值为255。 - - Raises: - TypeError: crop_size不是int/list/tuple。 - ValueError: target_size为list/tuple时元素个数不等于2。 - """ - - def __init__(self, - crop_size=512, - im_padding_value=127.5, - label_padding_value=255): - if isinstance(crop_size, list) or isinstance(crop_size, tuple): - if len(crop_size) != 2: - raise ValueError( - 'when crop_size is list or tuple, it should include 2 elements, but it is {}' - .format(crop_size)) - elif not isinstance(crop_size, int): - raise TypeError( - "Type of crop_size is invalid. Must be Integer or List or tuple, now is {}" - .format(type(crop_size))) - self.crop_size = crop_size - self.im_padding_value = im_padding_value - self.label_padding_value = label_padding_value - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if isinstance(self.crop_size, int): - crop_width = self.crop_size - crop_height = self.crop_size - else: - crop_width = self.crop_size[0] - crop_height = self.crop_size[1] - - img_height = im.shape[0] - img_width = im.shape[1] - - if img_height == crop_height and img_width == crop_width: - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - else: - pad_height = max(crop_height - img_height, 0) - pad_width = max(crop_width - img_width, 0) - if (pad_height > 0 or pad_width > 0): - im = np.pad( - im, - pad_width=((0, pad_height), (0, pad_width), (0, 0)), - mode='constant', - constant_values=(self.im_padding_value, - self.im_padding_value)) - if label is not None: - label = np.pad( - label, - pad_width=((0, pad_height), (0, pad_width)), - mode='constant', - constant_values=(self.label_padding_value, - self.label_padding_value)) - img_height = im.shape[0] - img_width = im.shape[1] - - if crop_height > 0 and crop_width > 0: - h_off = np.random.randint(img_height - crop_height + 1) - w_off = np.random.randint(img_width - crop_width + 1) - - im = im[h_off:(crop_height + h_off), w_off:( - w_off + crop_width), :] - if label is not None: - label = label[h_off:(crop_height + h_off), w_off:( - w_off + crop_width)] - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomBlur: - """以一定的概率对图像进行高斯模糊。 - - Args: - prob (float): 图像模糊概率。默认为0.1。 - """ - - def __init__(self, prob=0.1): - self.prob = prob - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.prob <= 0: - n = 0 - elif self.prob >= 1: - n = 1 - else: - n = int(1.0 / self.prob) - if n > 0: - if np.random.randint(0, n) == 0: - radius = np.random.randint(3, 10) - if radius % 2 != 1: - radius = radius + 1 - if radius > 9: - radius = 9 - im = cv2.GaussianBlur(im, (radius, radius), 0, 0) - - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class RandomScaleAspect: - """裁剪并resize回原始尺寸的图像和标注图像。 - 按照一定的面积比和宽高比对图像进行裁剪,并reszie回原始图像的图像,当存在标注图时,同步进行。 - - Args: - min_scale (float):裁取图像占原始图像的面积比,0-1,默认0返回原图。默认为0.5。 - aspect_ratio (float): 裁取图像的宽高比范围,非负,默认0返回原图。默认为0.33。 - """ - - def __init__(self, min_scale=0.5, aspect_ratio=0.33): - self.min_scale = min_scale - self.aspect_ratio = aspect_ratio - - def __call__(self, im, im_info=None, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当label为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当label不为空时,返回的tuple为(im, im_info, label),分别对应图像np.ndarray数据、 - 存储与图像相关信息的字典和标注图像np.ndarray数据。 - """ - if self.min_scale != 0 and self.aspect_ratio != 0: - img_height = im.shape[0] - img_width = im.shape[1] - for i in range(0, 10): - area = img_height * img_width - target_area = area * np.random.uniform(self.min_scale, 1.0) - aspectRatio = np.random.uniform(self.aspect_ratio, - 1.0 / self.aspect_ratio) - - dw = int(np.sqrt(target_area * 1.0 * aspectRatio)) - dh = int(np.sqrt(target_area * 1.0 / aspectRatio)) - if (np.random.randint(10) < 5): - tmp = dw - dw = dh - dh = tmp - - if (dh < img_height and dw < img_width): - h1 = np.random.randint(0, img_height - dh) - w1 = np.random.randint(0, img_width - dw) - - im = im[h1:(h1 + dh), w1:(w1 + dw), :] - label = label[h1:(h1 + dh), w1:(w1 + dw)] - im = cv2.resize( - im, (img_width, img_height), - interpolation=cv2.INTER_LINEAR) - label = cv2.resize( - label, (img_width, img_height), - interpolation=cv2.INTER_NEAREST) - break - if label is None: - return (im, im_info) - else: - return (im, im_info, label) - - -class ArrangeSegmenter: - """获取训练/验证/预测所需的信息。 - - Args: - mode (str): 指定数据用于何种用途,取值范围为['train', 'eval', 'test', 'quant']。 - - Raises: - ValueError: mode的取值不在['train', 'eval', 'test', 'quant']之内 - """ - - def __init__(self, mode): - if mode not in ['train', 'eval', 'test', 'quant']: - raise ValueError( - "mode should be defined as one of ['train', 'eval', 'test', 'quant']!" - ) - self.mode = mode - - def __call__(self, im, im_info, label=None): - """ - Args: - im (np.ndarray): 图像np.ndarray数据。 - im_info (dict): 存储与图像相关的信息。 - label (np.ndarray): 标注图像np.ndarray数据。 - - Returns: - tuple: 当mode为'train'或'eval'时,返回的tuple为(im, label),分别对应图像np.ndarray数据、存储与图像相关信息的字典; - 当mode为'test'时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;当mode为 - 'quant'时,返回的tuple为(im,),为图像np.ndarray数据。 - """ - im = permute(im, False) - if self.mode == 'train' or self.mode == 'eval': - label = label[np.newaxis, :, :] - return (im, label) - elif self.mode == 'test': - return (im, im_info) - else: - return (im, ) diff --git a/legacy/contrib/RemoteSensing/utils/__init__.py b/legacy/contrib/RemoteSensing/utils/__init__.py deleted file mode 100644 index 7a4a811281..0000000000 --- a/legacy/contrib/RemoteSensing/utils/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from . import logging -from . import utils -from .metrics import ConfusionMatrix -from .utils import * diff --git a/legacy/contrib/RemoteSensing/utils/logging.py b/legacy/contrib/RemoteSensing/utils/logging.py deleted file mode 100644 index 16670ca1d5..0000000000 --- a/legacy/contrib/RemoteSensing/utils/logging.py +++ /dev/null @@ -1,45 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import time -import os -import sys - -levels = {0: 'ERROR', 1: 'WARNING', 2: 'INFO', 3: 'DEBUG'} - - -def log(level=2, message=""): - current_time = time.time() - time_array = time.localtime(current_time) - current_time = time.strftime("%Y-%m-%d %H:%M:%S", time_array) - print("{} [{}]\t{}".format(current_time, levels[level], - message).encode("utf-8").decode("latin1")) - sys.stdout.flush() - - -def debug(message=""): - log(level=3, message=message) - - -def info(message=""): - log(level=2, message=message) - - -def warning(message=""): - log(level=1, message=message) - - -def error(message=""): - log(level=0, message=message) diff --git a/legacy/contrib/RemoteSensing/utils/metrics.py b/legacy/contrib/RemoteSensing/utils/metrics.py deleted file mode 100644 index 80df6c5df1..0000000000 --- a/legacy/contrib/RemoteSensing/utils/metrics.py +++ /dev/null @@ -1,156 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import numpy as np -from scipy.sparse import csr_matrix - - -class ConfusionMatrix(object): - """ - Confusion Matrix for segmentation evaluation - """ - - def __init__(self, num_classes=2, streaming=False): - self.confusion_matrix = np.zeros([num_classes, num_classes], - dtype='int64') - self.num_classes = num_classes - self.streaming = streaming - - def calculate(self, pred, label, ignore=None): - # If not in streaming mode, clear matrix everytime when call `calculate` - if not self.streaming: - self.zero_matrix() - - label = np.transpose(label, (0, 2, 3, 1)) - ignore = np.transpose(ignore, (0, 2, 3, 1)) - mask = np.array(ignore) == 1 - - label = np.asarray(label)[mask] - pred = np.asarray(pred)[mask] - one = np.ones_like(pred) - # Accumuate ([row=label, col=pred], 1) into sparse matrix - spm = csr_matrix((one, (label, pred)), - shape=(self.num_classes, self.num_classes)) - spm = spm.todense() - self.confusion_matrix += spm - - def zero_matrix(self): - """ Clear confusion matrix """ - self.confusion_matrix = np.zeros([self.num_classes, self.num_classes], - dtype='int64') - - def mean_iou(self): - iou_list = [] - avg_iou = 0 - # TODO: use numpy sum axis api to simpliy - vji = np.zeros(self.num_classes, dtype=int) - vij = np.zeros(self.num_classes, dtype=int) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - for c in range(self.num_classes): - total = vji[c] + vij[c] - self.confusion_matrix[c][c] - if total == 0: - iou = 0 - else: - iou = float(self.confusion_matrix[c][c]) / total - avg_iou += iou - iou_list.append(iou) - avg_iou = float(avg_iou) / float(self.num_classes) - return np.array(iou_list), avg_iou - - def accuracy(self): - total = self.confusion_matrix.sum() - total_right = 0 - for c in range(self.num_classes): - total_right += self.confusion_matrix[c][c] - if total == 0: - avg_acc = 0 - else: - avg_acc = float(total_right) / total - - vij = np.zeros(self.num_classes, dtype=int) - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - acc_list = [] - for c in range(self.num_classes): - if vij[c] == 0: - acc = 0 - else: - acc = self.confusion_matrix[c][c] / float(vij[c]) - acc_list.append(acc) - return np.array(acc_list), avg_acc - - def kappa(self): - vji = np.zeros(self.num_classes) - vij = np.zeros(self.num_classes) - for j in range(self.num_classes): - v_j = 0 - for i in range(self.num_classes): - v_j += self.confusion_matrix[j][i] - vji[j] = v_j - - for i in range(self.num_classes): - v_i = 0 - for j in range(self.num_classes): - v_i += self.confusion_matrix[j][i] - vij[i] = v_i - - total = self.confusion_matrix.sum() - - # avoid spillovers - # TODO: is it reasonable to hard code 10000.0? - total = float(total) / 10000.0 - vji = vji / 10000.0 - vij = vij / 10000.0 - - tp = 0 - tc = 0 - for c in range(self.num_classes): - tp += vji[c] * vij[c] - tc += self.confusion_matrix[c][c] - - tc = tc / 10000.0 - pe = tp / (total * total) - po = tc / total - - kappa = (po - pe) / (1 - pe) - return kappa - - def precision_recall(self): - ''' - precision, recall of foreground(value=1) for 2 categories - ''' - TP = self.confusion_matrix[1, 1] - FN = self.confusion_matrix[1, 0] - FP = self.confusion_matrix[0, 1] - recall = TP / (TP + FN) - precision = TP / (TP + FP) - return precision, recall diff --git a/legacy/contrib/RemoteSensing/utils/paddle_utils.py b/legacy/contrib/RemoteSensing/utils/paddle_utils.py deleted file mode 100644 index b30d46ffe7..0000000000 --- a/legacy/contrib/RemoteSensing/utils/paddle_utils.py +++ /dev/null @@ -1,28 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle - - -def enable_static(): - if hasattr(paddle, 'enable_static'): - paddle.enable_static() - - -def save_op_version_info(program_desc): - if hasattr(paddle.fluid.core, 'save_op_version_info'): - paddle.fluid.core.save_op_version_info(program_desc) - else: - paddle.fluid.core.save_op_compatible_info(program_desc) diff --git a/legacy/contrib/RemoteSensing/utils/pretrain_weights.py b/legacy/contrib/RemoteSensing/utils/pretrain_weights.py deleted file mode 100644 index 2e5397c9af..0000000000 --- a/legacy/contrib/RemoteSensing/utils/pretrain_weights.py +++ /dev/null @@ -1,26 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os.path as osp - - -def get_pretrain_weights(flag, backbone, save_dir): - if flag is None: - return None - elif osp.isdir(flag): - return flag - else: - raise Exception( - "pretrain_weights need to be defined as directory path.") diff --git a/legacy/contrib/RemoteSensing/utils/utils.py b/legacy/contrib/RemoteSensing/utils/utils.py deleted file mode 100644 index d39a43e79d..0000000000 --- a/legacy/contrib/RemoteSensing/utils/utils.py +++ /dev/null @@ -1,216 +0,0 @@ -# coding: utf8 -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import numpy as np -import six -import math -from . import logging - - -def seconds_to_hms(seconds): - h = math.floor(seconds / 3600) - m = math.floor((seconds - h * 3600) / 60) - s = int(seconds - h * 3600 - m * 60) - hms_str = "{}:{}:{}".format(h, m, s) - return hms_str - - -def setting_environ_flags(): - if 'FLAGS_eager_delete_tensor_gb' not in os.environ: - os.environ['FLAGS_eager_delete_tensor_gb'] = '0.0' - if 'FLAGS_allocator_strategy' not in os.environ: - os.environ['FLAGS_allocator_strategy'] = 'auto_growth' - if "CUDA_VISIBLE_DEVICES" in os.environ: - if os.environ["CUDA_VISIBLE_DEVICES"].count("-1") > 0: - os.environ["CUDA_VISIBLE_DEVICES"] = "" - - -def get_environ_info(): - setting_environ_flags() - import paddle.fluid as fluid - info = dict() - info['place'] = 'cpu' - info['num'] = int(os.environ.get('CPU_NUM', 1)) - if os.environ.get('CUDA_VISIBLE_DEVICES', None) != "": - if hasattr(fluid.core, 'get_cuda_device_count'): - gpu_num = 0 - try: - gpu_num = fluid.core.get_cuda_device_count() - except: - os.environ['CUDA_VISIBLE_DEVICES'] = '' - pass - if gpu_num > 0: - info['place'] = 'cuda' - info['num'] = fluid.core.get_cuda_device_count() - return info - - -def parse_param_file(param_file, return_shape=True): - from paddle.fluid.proto.framework_pb2 import VarType - f = open(param_file, 'rb') - version = np.fromstring(f.read(4), dtype='int32') - lod_level = np.fromstring(f.read(8), dtype='int64') - for i in range(int(lod_level)): - _size = np.fromstring(f.read(8), dtype='int64') - _ = f.read(_size) - version = np.fromstring(f.read(4), dtype='int32') - tensor_desc = VarType.TensorDesc() - tensor_desc_size = np.fromstring(f.read(4), dtype='int32') - tensor_desc.ParseFromString(f.read(int(tensor_desc_size))) - tensor_shape = tuple(tensor_desc.dims) - if return_shape: - f.close() - return tuple(tensor_desc.dims) - if tensor_desc.data_type != 5: - raise Exception( - "Unexpected data type while parse {}".format(param_file)) - data_size = 4 - for i in range(len(tensor_shape)): - data_size *= tensor_shape[i] - weight = np.fromstring(f.read(data_size), dtype='float32') - f.close() - return np.reshape(weight, tensor_shape) - - -def fuse_bn_weights(exe, main_prog, weights_dir): - import paddle.fluid as fluid - logging.info("Try to fuse weights of batch_norm...") - bn_vars = list() - for block in main_prog.blocks: - ops = list(block.ops) - for op in ops: - if op.type == 'affine_channel': - scale_name = op.input('Scale')[0] - bias_name = op.input('Bias')[0] - prefix = scale_name[:-5] - mean_name = prefix + 'mean' - variance_name = prefix + 'variance' - if not osp.exists(osp.join( - weights_dir, mean_name)) or not osp.exists( - osp.join(weights_dir, variance_name)): - logging.info( - "There's no batch_norm weight found to fuse, skip fuse_bn." - ) - return - - bias = block.var(bias_name) - pretrained_shape = parse_param_file( - osp.join(weights_dir, bias_name)) - actual_shape = tuple(bias.shape) - if pretrained_shape != actual_shape: - continue - bn_vars.append( - [scale_name, bias_name, mean_name, variance_name]) - eps = 1e-5 - for names in bn_vars: - scale_name, bias_name, mean_name, variance_name = names - scale = parse_param_file( - osp.join(weights_dir, scale_name), return_shape=False) - bias = parse_param_file( - osp.join(weights_dir, bias_name), return_shape=False) - mean = parse_param_file( - osp.join(weights_dir, mean_name), return_shape=False) - variance = parse_param_file( - osp.join(weights_dir, variance_name), return_shape=False) - bn_std = np.sqrt(np.add(variance, eps)) - new_scale = np.float32(np.divide(scale, bn_std)) - new_bias = bias - mean * new_scale - scale_tensor = fluid.global_scope().find_var(scale_name).get_tensor() - bias_tensor = fluid.global_scope().find_var(bias_name).get_tensor() - scale_tensor.set(new_scale, exe.place) - bias_tensor.set(new_bias, exe.place) - if len(bn_vars) == 0: - logging.info( - "There's no batch_norm weight found to fuse, skip fuse_bn.") - else: - logging.info("There's {} batch_norm ops been fused.".format( - len(bn_vars))) - - -def load_pdparams(exe, main_prog, model_dir): - import paddle.fluid as fluid - from paddle.fluid.proto.framework_pb2 import VarType - from paddle.fluid.framework import Program - - vars_to_load = list() - import pickle - with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: - params_dict = pickle.load(f) if six.PY2 else pickle.load( - f, encoding='latin1') - unused_vars = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if var.name not in params_dict: - raise Exception("{} is not in saved model".format(var.name)) - if var.shape != params_dict[var.name].shape: - unused_vars.append(var.name) - logging.warning( - "[SKIP] Shape of pretrained weight {} doesn't match.(Pretrained: {}, Actual: {})" - .format(var.name, params_dict[var.name].shape, var.shape)) - continue - vars_to_load.append(var) - logging.debug("Weight {} will be load".format(var.name)) - for var_name in unused_vars: - del params_dict[var_name] - fluid.io.set_program_state(main_prog, params_dict) - - if len(vars_to_load) == 0: - logging.warning( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - logging.info("There are {} varaibles in {} are loaded.".format( - len(vars_to_load), model_dir)) - - -def load_pretrain_weights(exe, main_prog, weights_dir, fuse_bn=False): - if not osp.exists(weights_dir): - raise Exception("Path {} not exists.".format(weights_dir)) - if osp.exists(osp.join(weights_dir, "model.pdparams")): - return load_pdparams(exe, main_prog, weights_dir) - import paddle.fluid as fluid - vars_to_load = list() - for var in main_prog.list_vars(): - if not isinstance(var, fluid.framework.Parameter): - continue - if not osp.exists(osp.join(weights_dir, var.name)): - logging.debug("[SKIP] Pretrained weight {}/{} doesn't exist".format( - weights_dir, var.name)) - continue - pretrained_shape = parse_param_file(osp.join(weights_dir, var.name)) - actual_shape = tuple(var.shape) - if pretrained_shape != actual_shape: - logging.warning( - "[SKIP] Shape of pretrained weight {}/{} doesn't match.(Pretrained: {}, Actual: {})" - .format(weights_dir, var.name, pretrained_shape, actual_shape)) - continue - vars_to_load.append(var) - logging.debug("Weight {} will be load".format(var.name)) - - params_dict = fluid.io.load_program_state( - weights_dir, var_list=vars_to_load) - fluid.io.set_program_state(main_prog, params_dict) - if len(vars_to_load) == 0: - logging.warning( - "There is no pretrain weights loaded, maybe you should check you pretrain model!" - ) - else: - logging.info("There are {} varaibles in {} are loaded.".format( - len(vars_to_load), weights_dir)) - if fuse_bn: - fuse_bn_weights(exe, main_prog, weights_dir) diff --git a/legacy/contrib/RemoteSensing/visualize_demo.py b/legacy/contrib/RemoteSensing/visualize_demo.py deleted file mode 100644 index fbc7a39cd3..0000000000 --- a/legacy/contrib/RemoteSensing/visualize_demo.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import os.path as osp -import argparse -from PIL import Image as Image -from models.utils import visualize as vis - - -def parse_args(): - parser = argparse.ArgumentParser(description='RemoteSensing visualization') - parser.add_argument( - '--data_dir', - dest='data_dir', - help='Dataset directory', - default=None, - type=str) - parser.add_argument( - '--file_list', - dest='file_list', - help='The name of file list that need to be visualized', - default=None, - type=str) - parser.add_argument( - '--pred_dir', - dest='pred_dir', - help='Directory for predict results', - default=None, - type=str) - parser.add_argument( - '--save_dir', - dest='save_dir', - help='Save directory for visual results', - default=None, - type=str) - return parser.parse_args() - - -args = parse_args() -data_dir = args.data_dir -pred_dir = args.pred_dir -save_dir = args.save_dir -file_list = osp.join(data_dir, args.file_list) -if not osp.exists(save_dir): - os.mkdir(save_dir) - -with open(file_list) as f: - lines = f.readlines() - for line in lines: - img_list = [] - - img_line = line.split(' ')[0] - img_name = osp.basename(img_line).replace('data.tif', 'photo.png') - img_path = osp.join(data_dir, 'data_vis', img_name) - img = Image.open(img_path) - img_list.append(img) - print('visualizing {}'.format(img_path)) - - gt_line = line.split(' ')[1].rstrip('\n') - gt_path = osp.join(data_dir, gt_line) - gt_pil = Image.open(gt_path) - img_list.append(gt_pil) - - pred_name = osp.basename(img_line).replace('tif', 'png') - pred_path = osp.join(pred_dir, pred_name) - pred_pil = Image.open(pred_path) - img_list.append(pred_pil) - - save_path = osp.join(save_dir, pred_name) - vis.splice_imgs(img_list, save_path) - print('saved in {}'.format(save_path)) diff --git a/legacy/contrib/SpatialEmbeddings/README.md b/legacy/contrib/SpatialEmbeddings/README.md deleted file mode 100644 index cd3b6b0139..0000000000 --- a/legacy/contrib/SpatialEmbeddings/README.md +++ /dev/null @@ -1,63 +0,0 @@ -# SpatialEmbeddings - -## 模型概述 -本模型是基于proposal-free的实例分割模型,快速实时,同时准确率高,适用于自动驾驶等实时场景。 - -本模型基于KITTI中MOTS数据集训练得到,是论文 Segment as Points for Efficient Online Multi-Object Tracking and Segmentation中的分割部分 -[论文地址](https://arxiv.org/pdf/2007.01550.pdf) - -## KITTI MOTS指标 -KITTI MOTS验证集AP:0.76, AP_50%:0.915 - -## 代码使用说明 - -### 1. 模型下载 - -执行以下命令下载并解压SpatialEmbeddings预测模型: - -``` -python download_SpatialEmbeddings_kitti.py -``` - -或点击[链接](https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar)进行手动下载并解压。 - -### 2. 数据下载 - -前往KITTI官网下载MOTS比赛数据[链接](https://www.vision.rwth-aachen.de/page/mots) - -下载后解压到./data文件夹下, 并生成验证集图片路径的test.txt - -### 3. 快速预测 - -使用GPU预测 -``` -python -u infer.py --use_gpu -``` - -使用CPU预测: -``` -python -u infer.py -``` -数据及模型路径等详细配置见config.py文件 - -#### 4. 预测结果示例: - - 原图: - - ![](imgs/kitti_0007_000518_ori.png) - - 预测结果: - - ![](imgs/kitti_0007_000518_pred.png) - - - -## 引用 - -**论文** - -*Instance Segmentation by Jointly Optimizing Spatial Embeddings and Clustering Bandwidth* - -**代码** - -https://github.com/davyneven/SpatialEmbeddings diff --git a/legacy/contrib/SpatialEmbeddings/config.py b/legacy/contrib/SpatialEmbeddings/config.py deleted file mode 100644 index 844b729e38..0000000000 --- a/legacy/contrib/SpatialEmbeddings/config.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -from utils.util import AttrDict, merge_cfg_from_args, get_arguments -import os - -args = get_arguments() -cfg = AttrDict() - -# 待预测图像所在路径 -cfg.data_dir = "data" -# 待预测图像名称列表 -cfg.data_list_file = os.path.join("data", "test.txt") -# 模型加载路径 -cfg.model_path = 'SpatialEmbeddings_kitti' -# 预测结果保存路径 -cfg.vis_dir = "result" - -# 待预测图像输入尺寸 -cfg.input_size = (384, 1248) -# sigma值 -cfg.n_sigma = 2 -# 中心点阈值 -cfg.threshold = 0.94 -# 点集数阈值 -cfg.min_pixel = 160 - -merge_cfg_from_args(args, cfg) diff --git a/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000512.png b/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000512.png deleted file mode 100755 index 672a3b9ee5..0000000000 Binary files a/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000512.png and /dev/null differ diff --git a/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000518.png b/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000518.png deleted file mode 100755 index bee7d3af3b..0000000000 Binary files a/legacy/contrib/SpatialEmbeddings/data/kitti/0007/kitti_0007_000518.png and /dev/null differ diff --git a/legacy/contrib/SpatialEmbeddings/data/test.txt b/legacy/contrib/SpatialEmbeddings/data/test.txt deleted file mode 100644 index 47e7e3865c..0000000000 --- a/legacy/contrib/SpatialEmbeddings/data/test.txt +++ /dev/null @@ -1,2 +0,0 @@ -kitti/0007/kitti_0007_000512.png -kitti/0007/kitti_0007_000518.png diff --git a/legacy/contrib/SpatialEmbeddings/download_SpatialEmbeddings_kitti.py b/legacy/contrib/SpatialEmbeddings/download_SpatialEmbeddings_kitti.py deleted file mode 100644 index 82b53d8220..0000000000 --- a/legacy/contrib/SpatialEmbeddings/download_SpatialEmbeddings_kitti.py +++ /dev/null @@ -1,32 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - -if __name__ == "__main__": - download_file_and_uncompress( - url='https://paddleseg.bj.bcebos.com/models/SpatialEmbeddings_kitti.tar', - savepath=LOCAL_PATH, - extrapath=LOCAL_PATH, - extraname='SpatialEmbeddings_kitti') - - print("Pretrained Model download success!") diff --git a/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_ori.png b/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_ori.png deleted file mode 100755 index bee7d3af3b..0000000000 Binary files a/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_ori.png and /dev/null differ diff --git a/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_pred.png b/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_pred.png deleted file mode 100644 index 96489deed4..0000000000 Binary files a/legacy/contrib/SpatialEmbeddings/imgs/kitti_0007_000518_pred.png and /dev/null differ diff --git a/legacy/contrib/SpatialEmbeddings/infer.py b/legacy/contrib/SpatialEmbeddings/infer.py deleted file mode 100644 index 9b6c4db891..0000000000 --- a/legacy/contrib/SpatialEmbeddings/infer.py +++ /dev/null @@ -1,135 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import numpy as np -from utils.util import get_arguments -from utils.palette import get_palette -from utils.data_util import Cluster, pad_img -from PIL import Image as PILImage -import importlib -import paddle.fluid as fluid -from models import SpatialEmbeddings - -args = get_arguments() -config = importlib.import_module('config') -cfg = getattr(config, 'cfg') - -cluster = Cluster() - - -# 预测数据集类 -class TestDataSet(): - def __init__(self): - self.data_dir = cfg.data_dir - self.data_list_file = cfg.data_list_file - self.data_list = self.get_data_list() - self.data_num = len(self.data_list) - - def get_data_list(self): - # 获取预测图像路径列表 - data_list = [] - data_file_handler = open(self.data_list_file, 'r') - for line in data_file_handler: - img_name = line.strip() - name_prefix = img_name.split('.')[0] - if len(img_name.split('.')) == 1: - img_name = img_name + '.jpg' - img_path = os.path.join(self.data_dir, img_name) - data_list.append(img_path) - return data_list - - def preprocess(self, img): - # 图像预处理 - h, w = img.shape[:2] - h_new, w_new = cfg.input_size - img = np.pad(img, ((0, h_new - h), (0, w_new - w), (0, 0)), 'edge') - img = img.astype(np.float32) / 255.0 - img = img.transpose((2, 0, 1)) - img = np.expand_dims(img, axis=0) - return img - - def get_data(self, index): - # 获取图像信息 - img_path = self.data_list[index] - img = np.array(PILImage.open(img_path)) - if img is None: - return img, img, img_path, None - - img_name = img_path.split(os.sep)[-1] - name_prefix = img_name.replace('.' + img_name.split('.')[-1], '') - img_shape = img.shape[:2] - img_process = self.preprocess(img) - - return img_process, name_prefix, img_shape - - -def get_model(main_prog, startup_prog): - img_shape = [3, cfg.input_size[0], cfg.input_size[1]] - with fluid.program_guard(main_prog, startup_prog): - with fluid.unique_name.guard(): - input = fluid.layers.data( - name='image', shape=img_shape, dtype='float32') - output = SpatialEmbeddings(input) - return input, output - - -def infer(): - if not os.path.exists(cfg.vis_dir): - os.makedirs(cfg.vis_dir) - - startup_prog = fluid.Program() - test_prog = fluid.Program() - - input, output = get_model(test_prog, startup_prog) - test_prog = test_prog.clone(for_test=True) - - place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_prog) - - if not os.path.exists(cfg.model_path): - raise RuntimeError('No pre-trained model found under path {}'.format( - cfg.model_path)) - - # 加载预测模型 - def if_exist(var): - return os.path.exists(os.path.join(cfg.model_path, var.name)) - - fluid.io.load_vars( - exe, cfg.model_path, main_program=test_prog, predicate=if_exist) - - #加载预测数据集 - test_dataset = TestDataSet() - data_num = test_dataset.data_num - - for idx in range(data_num): - # 数据获取 - image, im_name, im_shape = test_dataset.get_data(idx) - if image is None: - print(im_name, 'is None') - continue - - # 预测 - outputs = exe.run( - program=test_prog, feed={'image': image}, fetch_list=output) - instance_map, predictions = cluster.cluster(outputs[0][0], n_sigma=cfg.n_sigma, \ - min_pixel=cfg.min_pixel, threshold=cfg.threshold) - - # 预测结果保存 - instance_map = pad_img(instance_map, image.shape[2:]) - instance_map = instance_map[:im_shape[0], :im_shape[1]] - output_im = PILImage.fromarray(np.asarray(instance_map, dtype=np.uint8)) - palette = get_palette(len(predictions) + 1) - output_im.putpalette(palette) - result_path = os.path.join(cfg.vis_dir, im_name + '.png') - output_im.save(result_path) - - if (idx + 1) % 100 == 0: - print('%d processd' % (idx + 1)) - - print('%d processd done' % (idx + 1)) - - return 0 - - -if __name__ == "__main__": - infer() diff --git a/legacy/contrib/SpatialEmbeddings/models.py b/legacy/contrib/SpatialEmbeddings/models.py deleted file mode 100644 index 2eb3c8680d..0000000000 --- a/legacy/contrib/SpatialEmbeddings/models.py +++ /dev/null @@ -1,800 +0,0 @@ -from paddle.fluid.initializer import Constant -from paddle.fluid.param_attr import ParamAttr -import paddle.fluid as fluid - - -def conv(input, - num_filters, - filter_size=None, - stride=1, - padding=0, - dilation=1, - act=None, - name='conv'): - return fluid.layers.conv2d( - input, - filter_size=filter_size, - num_filters=num_filters, - stride=stride, - padding=padding, - dilation=dilation, - act=act, - name=name, - param_attr=name + '_weights', - bias_attr=name + '_bias') - - -def conv_transpose(input, - num_filters, - output_size=None, - filter_size=None, - stride=1, - padding=0, - act=None, - name='conv_transpose'): - return fluid.layers.conv2d_transpose( - input, - filter_size=filter_size, - num_filters=num_filters, - stride=stride, - padding=padding, - act=act, - name=name, - param_attr=name + '_weights', - bias_attr=name + '_bias') - - -EPSILON = 0.0010000000474974513 - - -def bn(input, name): - bn_id = name.replace('batch_norm', '') - return fluid.layers.batch_norm( - input, - is_test=True, - epsilon=EPSILON, - param_attr='bn_scale' + bn_id + '_scale', - bias_attr='bn_scale' + bn_id + '_offset', - moving_mean_name=name + '_mean', - moving_variance_name=name + '_variance', - name=name) - - -def max_pool(input, pool_size=2, pool_stride=2, name=None): - return fluid.layers.pool2d( - input, - pool_size=pool_size, - pool_stride=pool_stride, - ceil_mode=True, - pool_type='max', - exclusive=False, - name=name) - - -def SpatialEmbeddings(input): - conv1 = conv( - input, filter_size=3, num_filters=13, stride=2, padding=1, name='conv1') - max_pool1 = fluid.layers.pool2d( - input, pool_size=2, pool_stride=2, name='max_pool1') - cat1 = fluid.layers.concat([conv1, max_pool1], axis=1, name='cat1') - bn_scale1 = bn(cat1, name='batch_norm1') - relu1 = fluid.layers.relu(bn_scale1) - conv2 = conv( - relu1, filter_size=3, num_filters=48, stride=2, padding=1, name='conv2') - max_pool2 = fluid.layers.pool2d( - relu1, pool_size=2, pool_stride=2, name='max_pool2') - cat2 = fluid.layers.concat([conv2, max_pool2], axis=1, name='cat2') - bn_scale2 = bn(cat2, name='batch_norm2') - relu2 = fluid.layers.relu(bn_scale2) - relu3 = conv( - relu2, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv3', - act='relu') - conv4 = conv( - relu3, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv4') - bn_scale3 = bn(conv4, name='batch_norm3') - relu4 = fluid.layers.relu(bn_scale3) - relu5 = conv( - relu4, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv5', - act='relu') - conv6 = conv( - relu5, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv6') - bn_scale4 = bn(conv6, name='batch_norm4') - add1 = fluid.layers.elementwise_add(x=bn_scale4, y=relu2, name='add1') - relu6 = fluid.layers.relu(add1) - relu7 = conv( - relu6, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv7', - act='relu') - conv8 = conv( - relu7, filter_size=[1, 3], num_filters=64, padding=[0, 1], name='conv8') - bn_scale5 = bn(conv8, name='batch_norm5') - relu8 = fluid.layers.relu(bn_scale5) - relu9 = conv( - relu8, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv9', - act='relu') - conv10 = conv( - relu9, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv10') - bn_scale6 = bn(conv10, name='batch_norm6') - add2 = fluid.layers.elementwise_add(x=bn_scale6, y=relu6, name='add2') - relu10 = fluid.layers.relu(add2) - relu11 = conv( - relu10, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv11', - act='relu') - conv12 = conv( - relu11, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv12') - bn_scale7 = bn(conv12, name='batch_norm7') - relu12 = fluid.layers.relu(bn_scale7) - relu13 = conv( - relu12, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv13', - act='relu') - conv14 = conv( - relu13, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv14') - bn_scale8 = bn(conv14, name='batch_norm8') - add3 = fluid.layers.elementwise_add(x=bn_scale8, y=relu10, name='add3') - relu14 = fluid.layers.relu(add3) - relu15 = conv( - relu14, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv15', - act='relu') - conv16 = conv( - relu15, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv16') - bn_scale9 = bn(conv16, name='batch_norm9') - relu16 = fluid.layers.relu(bn_scale9) - relu17 = conv( - relu16, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv17', - act='relu') - conv18 = conv( - relu17, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv18') - bn_scale10 = bn(conv18, name='batch_norm10') - add4 = fluid.layers.elementwise_add(x=bn_scale10, y=relu14, name='add4') - relu18 = fluid.layers.relu(add4) - relu19 = conv( - relu18, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv19', - act='relu') - conv20 = conv( - relu19, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv20') - bn_scale11 = bn(conv20, name='batch_norm11') - relu20 = fluid.layers.relu(bn_scale11) - relu21 = conv( - relu20, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv21', - act='relu') - conv22 = conv( - relu21, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv22') - bn_scale12 = bn(conv22, name='batch_norm12') - add5 = fluid.layers.elementwise_add(x=bn_scale12, y=relu18, name='add5') - relu22 = fluid.layers.relu(add5) - conv23 = conv( - relu22, - filter_size=3, - num_filters=64, - stride=2, - padding=1, - name='conv23') - max_pool3 = fluid.layers.pool2d( - relu22, pool_size=2, pool_stride=2, name='max_pool3') - cat3 = fluid.layers.concat([conv23, max_pool3], axis=1, name='cat3') - bn_scale13 = bn(cat3, name='batch_norm13') - relu23 = fluid.layers.relu(bn_scale13) - relu24 = conv( - relu23, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv24', - act='relu') - conv25 = conv( - relu24, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv25') - bn_scale14 = bn(conv25, name='batch_norm14') - relu25 = fluid.layers.relu(bn_scale14) - relu26 = conv( - relu25, - filter_size=[3, 1], - num_filters=128, - padding=[2, 0], - dilation=[2, 1], - name='conv26', - act='relu') - conv27 = conv( - relu26, - filter_size=[1, 3], - num_filters=128, - padding=[0, 2], - dilation=[1, 2], - name='conv27') - bn_scale15 = bn(conv27, name='batch_norm15') - add6 = fluid.layers.elementwise_add(x=bn_scale15, y=relu23, name='add6') - relu27 = fluid.layers.relu(add6) - relu28 = conv( - relu27, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv28', - act='relu') - conv29 = conv( - relu28, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv29') - bn_scale16 = bn(conv29, name='batch_norm16') - relu29 = fluid.layers.relu(bn_scale16) - relu30 = conv( - relu29, - filter_size=[3, 1], - num_filters=128, - padding=[4, 0], - dilation=[4, 1], - name='conv30', - act='relu') - conv31 = conv( - relu30, - filter_size=[1, 3], - num_filters=128, - padding=[0, 4], - dilation=[1, 4], - name='conv31') - bn_scale17 = bn(conv31, name='batch_norm17') - add7 = fluid.layers.elementwise_add(x=bn_scale17, y=relu27, name='add7') - relu31 = fluid.layers.relu(add7) - relu32 = conv( - relu31, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv32', - act='relu') - conv33 = conv( - relu32, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv33') - bn_scale18 = bn(conv33, name='batch_norm18') - relu33 = fluid.layers.relu(bn_scale18) - relu34 = conv( - relu33, - filter_size=[3, 1], - num_filters=128, - padding=[8, 0], - dilation=[8, 1], - name='conv34', - act='relu') - conv35 = conv( - relu34, - filter_size=[1, 3], - num_filters=128, - padding=[0, 8], - dilation=[1, 8], - name='conv35') - bn_scale19 = bn(conv35, name='batch_norm19') - add8 = fluid.layers.elementwise_add(x=bn_scale19, y=relu31, name='add8') - relu35 = fluid.layers.relu(add8) - relu36 = conv( - relu35, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv36', - act='relu') - conv37 = conv( - relu36, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv37') - bn_scale20 = bn(conv37, name='batch_norm20') - relu37 = fluid.layers.relu(bn_scale20) - relu38 = conv( - relu37, - filter_size=[3, 1], - num_filters=128, - padding=[16, 0], - dilation=[16, 1], - name='conv38', - act='relu') - conv39 = conv( - relu38, - filter_size=[1, 3], - num_filters=128, - padding=[0, 16], - dilation=[1, 16], - name='conv39') - bn_scale21 = bn(conv39, name='batch_norm21') - add9 = fluid.layers.elementwise_add(x=bn_scale21, y=relu35, name='add9') - relu39 = fluid.layers.relu(add9) - relu40 = conv( - relu39, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv40', - act='relu') - conv41 = conv( - relu40, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv41') - bn_scale22 = bn(conv41, name='batch_norm22') - relu41 = fluid.layers.relu(bn_scale22) - relu42 = conv( - relu41, - filter_size=[3, 1], - num_filters=128, - padding=[2, 0], - dilation=[2, 1], - name='conv42', - act='relu') - conv43 = conv( - relu42, - filter_size=[1, 3], - num_filters=128, - padding=[0, 2], - dilation=[1, 2], - name='conv43') - bn_scale23 = bn(conv43, name='batch_norm23') - add10 = fluid.layers.elementwise_add(x=bn_scale23, y=relu39, name='add10') - relu43 = fluid.layers.relu(add10) - relu44 = conv( - relu43, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv44', - act='relu') - conv45 = conv( - relu44, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv45') - bn_scale24 = bn(conv45, name='batch_norm24') - relu45 = fluid.layers.relu(bn_scale24) - relu46 = conv( - relu45, - filter_size=[3, 1], - num_filters=128, - padding=[4, 0], - dilation=[4, 1], - name='conv46', - act='relu') - conv47 = conv( - relu46, - filter_size=[1, 3], - num_filters=128, - padding=[0, 4], - dilation=[1, 4], - name='conv47') - bn_scale25 = bn(conv47, name='batch_norm25') - add11 = fluid.layers.elementwise_add(x=bn_scale25, y=relu43, name='add11') - relu47 = fluid.layers.relu(add11) - relu48 = conv( - relu47, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv48', - act='relu') - conv49 = conv( - relu48, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv49') - bn_scale26 = bn(conv49, name='batch_norm26') - relu49 = fluid.layers.relu(bn_scale26) - relu50 = conv( - relu49, - filter_size=[3, 1], - num_filters=128, - padding=[8, 0], - dilation=[8, 1], - name='conv50', - act='relu') - conv51 = conv( - relu50, - filter_size=[1, 3], - num_filters=128, - padding=[0, 8], - dilation=[1, 8], - name='conv51') - bn_scale27 = bn(conv51, name='batch_norm27') - add12 = fluid.layers.elementwise_add(x=bn_scale27, y=relu47, name='add12') - relu51 = fluid.layers.relu(add12) - relu52 = conv( - relu51, - filter_size=[3, 1], - num_filters=128, - padding=[1, 0], - name='conv52', - act='relu') - conv53 = conv( - relu52, - filter_size=[1, 3], - num_filters=128, - padding=[0, 1], - name='conv53') - bn_scale28 = bn(conv53, name='batch_norm28') - relu53 = fluid.layers.relu(bn_scale28) - relu54 = conv( - relu53, - filter_size=[3, 1], - num_filters=128, - padding=[16, 0], - dilation=[16, 1], - name='conv54', - act='relu') - conv55 = conv( - relu54, - filter_size=[1, 3], - num_filters=128, - padding=[0, 16], - dilation=[1, 16], - name='conv55') - bn_scale29 = bn(conv55, name='batch_norm29') - add13 = fluid.layers.elementwise_add(x=bn_scale29, y=relu51, name='add13') - relu55 = fluid.layers.relu(add13) - conv_transpose1 = conv_transpose( - relu55, - filter_size=3, - num_filters=64, - stride=2, - padding=1, - name='conv_transpose1') - conv_transpose4 = conv_transpose( - relu55, - filter_size=3, - num_filters=64, - stride=2, - padding=1, - name='conv_transpose4') - bn_scale30 = bn(conv_transpose1, name='batch_norm30') - bn_scale40 = bn(conv_transpose4, name='batch_norm40') - relu56 = fluid.layers.relu(bn_scale30) - relu74 = fluid.layers.relu(bn_scale40) - relu57 = conv( - relu56, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv56', - act='relu') - relu75 = conv( - relu74, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv72', - act='relu') - conv57 = conv( - relu57, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv57') - conv73 = conv( - relu75, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv73') - bn_scale31 = bn(conv57, name='batch_norm31') - bn_scale41 = bn(conv73, name='batch_norm41') - relu58 = fluid.layers.relu(bn_scale31) - relu76 = fluid.layers.relu(bn_scale41) - relu59 = conv( - relu58, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv58', - act='relu') - relu77 = conv( - relu76, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv74', - act='relu') - conv59 = conv( - relu59, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv59') - conv75 = conv( - relu77, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv75') - bn_scale32 = bn(conv59, name='batch_norm32') - bn_scale42 = bn(conv75, name='batch_norm42') - add14 = fluid.layers.elementwise_add(x=bn_scale32, y=relu56, name='add14') - add18 = fluid.layers.elementwise_add(x=bn_scale42, y=relu74, name='add18') - relu60 = fluid.layers.relu(add14) - relu78 = fluid.layers.relu(add18) - relu61 = conv( - relu60, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv60', - act='relu') - relu79 = conv( - relu78, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv76', - act='relu') - conv61 = conv( - relu61, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv61') - conv77 = conv( - relu79, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv77') - bn_scale33 = bn(conv61, name='batch_norm33') - bn_scale43 = bn(conv77, name='batch_norm43') - relu62 = fluid.layers.relu(bn_scale33) - relu80 = fluid.layers.relu(bn_scale43) - relu63 = conv( - relu62, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv62', - act='relu') - relu81 = conv( - relu80, - filter_size=[3, 1], - num_filters=64, - padding=[1, 0], - name='conv78', - act='relu') - conv63 = conv( - relu63, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv63') - conv79 = conv( - relu81, - filter_size=[1, 3], - num_filters=64, - padding=[0, 1], - name='conv79') - bn_scale34 = bn(conv63, name='batch_norm34') - bn_scale44 = bn(conv79, name='batch_norm44') - add15 = fluid.layers.elementwise_add(x=bn_scale34, y=relu60, name='add15') - add19 = fluid.layers.elementwise_add(x=bn_scale44, y=relu78, name='add19') - relu64 = fluid.layers.relu(add15) - relu82 = fluid.layers.relu(add19) - conv_transpose2 = conv_transpose( - relu64, - filter_size=3, - num_filters=16, - stride=2, - padding=1, - name='conv_transpose2') - conv_transpose5 = conv_transpose( - relu82, - filter_size=3, - num_filters=16, - stride=2, - padding=1, - name='conv_transpose5') - bn_scale35 = bn(conv_transpose2, name='batch_norm35') - bn_scale45 = bn(conv_transpose5, name='batch_norm45') - relu65 = fluid.layers.relu(bn_scale35) - relu83 = fluid.layers.relu(bn_scale45) - relu66 = conv( - relu65, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv64', - act='relu') - relu84 = conv( - relu83, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv80', - act='relu') - conv65 = conv( - relu66, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv65') - conv81 = conv( - relu84, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv81') - bn_scale36 = bn(conv65, name='batch_norm36') - bn_scale46 = bn(conv81, name='batch_norm46') - relu67 = fluid.layers.relu(bn_scale36) - relu85 = fluid.layers.relu(bn_scale46) - relu68 = conv( - relu67, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv66', - act='relu') - relu86 = conv( - relu85, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv82', - act='relu') - conv67 = conv( - relu68, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv67') - conv83 = conv( - relu86, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv83') - bn_scale37 = bn(conv67, name='batch_norm37') - bn_scale47 = bn(conv83, name='batch_norm47') - add16 = fluid.layers.elementwise_add(x=bn_scale37, y=relu65, name='add16') - add20 = fluid.layers.elementwise_add(x=bn_scale47, y=relu83, name='add20') - relu69 = fluid.layers.relu(add16) - relu87 = fluid.layers.relu(add20) - relu70 = conv( - relu69, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv68', - act='relu') - relu88 = conv( - relu87, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv84', - act='relu') - conv69 = conv( - relu70, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv69') - conv85 = conv( - relu88, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv85') - bn_scale38 = bn(conv69, name='batch_norm38') - bn_scale48 = bn(conv85, name='batch_norm48') - relu71 = fluid.layers.relu(bn_scale38) - relu89 = fluid.layers.relu(bn_scale48) - relu72 = conv( - relu71, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv70', - act='relu') - relu90 = conv( - relu89, - filter_size=[3, 1], - num_filters=16, - padding=[1, 0], - name='conv86', - act='relu') - conv71 = conv( - relu72, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv71') - conv87 = conv( - relu90, - filter_size=[1, 3], - num_filters=16, - padding=[0, 1], - name='conv87') - bn_scale39 = bn(conv71, name='batch_norm39') - bn_scale49 = bn(conv87, name='batch_norm49') - add17 = fluid.layers.elementwise_add(x=bn_scale39, y=relu69, name='add17') - add21 = fluid.layers.elementwise_add(x=bn_scale49, y=relu87, name='add21') - relu73 = fluid.layers.relu(add17) - relu91 = fluid.layers.relu(add21) - conv_transpose3 = conv_transpose( - relu73, filter_size=2, num_filters=4, stride=2, name='conv_transpose3') - conv_transpose6 = conv_transpose( - relu91, filter_size=2, num_filters=1, stride=2, name='conv_transpose6') - cat4 = fluid.layers.concat([conv_transpose3, conv_transpose6], - axis=1, - name='cat4') - - return cat4 diff --git a/legacy/contrib/SpatialEmbeddings/utils/__init__.py b/legacy/contrib/SpatialEmbeddings/utils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/legacy/contrib/SpatialEmbeddings/utils/data_util.py b/legacy/contrib/SpatialEmbeddings/utils/data_util.py deleted file mode 100644 index 54082767b2..0000000000 --- a/legacy/contrib/SpatialEmbeddings/utils/data_util.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import numpy as np -from PIL import Image as PILImage - - -def sigmoid_np(x): - return 1 / (1 + np.exp(-x)) - - -class Cluster: - def __init__(self, ): - xm = np.repeat( - np.linspace(0, 2, 2048)[np.newaxis, np.newaxis, :], 1024, axis=1) - ym = np.repeat( - np.linspace(0, 1, 1024)[np.newaxis, :, np.newaxis], 2048, axis=2) - self.xym = np.vstack((xm, ym)) - - def cluster(self, prediction, n_sigma=1, min_pixel=160, threshold=0.5): - - height, width = prediction.shape[1:3] - xym_s = self.xym[:, 0:height, 0:width] - - spatial_emb = np.tanh(prediction[0:2]) + xym_s - sigma = prediction[2:2 + n_sigma] - seed_map = sigmoid_np(prediction[2 + n_sigma:2 + n_sigma + 1]) - - instance_map = np.zeros((height, width), np.float32) - instances = [] - count = 1 - mask = seed_map > 0.5 - - if mask.sum() > min_pixel: - spatial_emb_masked = spatial_emb[np.repeat(mask, \ - spatial_emb.shape[0], 0)].reshape(2, -1) - sigma_masked = sigma[np.repeat(mask, n_sigma, 0)].reshape( - n_sigma, -1) - seed_map_masked = seed_map[mask].reshape(1, -1) - - unclustered = np.ones(mask.sum(), np.float32) - instance_map_masked = np.zeros(mask.sum(), np.float32) - - while (unclustered.sum() > min_pixel): - - seed = (seed_map_masked * unclustered).argmax().item() - seed_score = (seed_map_masked * unclustered).max().item() - if seed_score < threshold: - break - center = spatial_emb_masked[:, seed:seed + 1] - unclustered[seed] = 0 - s = np.exp(sigma_masked[:, seed:seed + 1] * 10) - dist = np.exp(-1 * np.sum( - (spatial_emb_masked - center)**2 * s, 0)) - proposal = (dist > 0.5).squeeze() - - if proposal.sum() > min_pixel: - if unclustered[proposal].sum() / proposal.sum() > 0.5: - instance_map_masked[proposal.squeeze()] = count - instance_mask = np.zeros((height, width), np.float32) - instance_mask[mask.squeeze()] = proposal - instances.append( - {'mask': (instance_mask.squeeze()*255).astype(np.uint8), \ - 'score': seed_score}) - count += 1 - - unclustered[proposal] = 0 - - instance_map[mask.squeeze()] = instance_map_masked - - return instance_map, instances - - -def pad_img(img, dst_shape, mode='constant'): - img_h, img_w = img.shape[:2] - dst_h, dst_w = dst_shape - pad_shape = ((0, max(0, dst_h - img_h)), (0, max(0, dst_w - img_w))) - return np.pad(img, pad_shape, mode) - - -def save_for_eval(predictions, infer_shape, im_shape, vis_dir, im_name): - txt_file = os.path.join(vis_dir, im_name + '.txt') - with open(txt_file, 'w') as f: - for id, pred in enumerate(predictions): - save_name = im_name + '_{:02d}.png'.format(id) - pred_mask = pad_img(pred['mask'], infer_shape) - pred_mask = pred_mask[:im_shape[0], :im_shape[1]] - im = PILImage.fromarray(pred_mask) - im.save(os.path.join(vis_dir, save_name)) - cl = 26 - score = pred['score'] - f.writelines("{} {} {:.02f}\n".format(save_name, cl, score)) diff --git a/legacy/contrib/SpatialEmbeddings/utils/palette.py b/legacy/contrib/SpatialEmbeddings/utils/palette.py deleted file mode 100644 index 2186203cbc..0000000000 --- a/legacy/contrib/SpatialEmbeddings/utils/palette.py +++ /dev/null @@ -1,38 +0,0 @@ -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -## Created by: RainbowSecret -## Microsoft Research -## yuyua@microsoft.com -## Copyright (c) 2018 -## -## This source code is licensed under the MIT-style license found in the -## LICENSE file in the root directory of this source tree -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import numpy as np -import cv2 - - -def get_palette(num_cls): - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette diff --git a/legacy/contrib/SpatialEmbeddings/utils/util.py b/legacy/contrib/SpatialEmbeddings/utils/util.py deleted file mode 100644 index bb9683ef1f..0000000000 --- a/legacy/contrib/SpatialEmbeddings/utils/util.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -import argparse -import os - - -def get_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--use_gpu", action="store_true", help="Use gpu or cpu to test.") - parser.add_argument( - '--example', type=str, help='RoadLine, HumanSeg or ACE2P') - - return parser.parse_args() - - -class AttrDict(dict): - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - - def __getattr__(self, name): - if name in self.__dict__: - return self.__dict__[name] - elif name in self: - return self[name] - else: - raise AttributeError(name) - - def __setattr__(self, name, value): - if name in self.__dict__: - self.__dict__[name] = value - else: - self[name] = value - - -def merge_cfg_from_args(args, cfg): - """Merge config keys, values in args into the global config.""" - for k, v in vars(args).items(): - d = cfg - try: - value = eval(v) - except: - value = v - if value is not None: - cfg[k] = value diff --git a/legacy/dataset/README.md b/legacy/dataset/README.md deleted file mode 100644 index 497934e295..0000000000 --- a/legacy/dataset/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# 数据下载 -## PASCAL VOC 2012数据集 -下载 PASCAL VOC 2012数据集并将分割部分的假彩色标注图(`SegmentationClass`文件夹)转换成灰度图并存储在`SegmentationClassAug`文件夹, 并在文件夹`ImageSets/Segmentation`下重新生成列表文件`train.list、val.list和trainval.list。 - -```shell -# 下载数据集并进行解压转换 -python download_and_convert_voc2012.py -``` - -如果已经下载好PASCAL VOC 2012数据集,将数据集移至dataset目录后使用下述命令直接进行转换即可。 - -```shell -# 数据集转换 -python convert_voc2012.py -``` - -## Oxford-IIIT Pet数据集 -我们使用了Oxford-IIIT中的猫和狗两个类别数据制作了一个小数据集mini_pet,更多关于数据集的介绍请参考[Oxford-IIIT Pet](https://www.robots.ox.ac.uk/~vgg/data/pets/)。 - -```shell -# 下载数据集并进行解压 -python dataset/download_pet.py -``` - -## Cityscapes数据集 -运行下述命令下载并解压Cityscapes数据集。 - -```shell -# 下载数据集并进行解压 -python dataset/download_cityscapes.py -``` diff --git a/legacy/dataset/convert_voc2012.py b/legacy/dataset/convert_voc2012.py deleted file mode 100644 index cce8aef601..0000000000 --- a/legacy/dataset/convert_voc2012.py +++ /dev/null @@ -1,76 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import numpy as np -import os -from PIL import Image -import glob - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) - - -def remove_colormap(filename): - gray_anno = np.array(Image.open(filename)) - return gray_anno - - -def save_annotation(annotation, filename): - annotation = annotation.astype(dtype=np.uint8) - annotation = Image.fromarray(annotation) - annotation.save(filename) - - -def convert_list(origin_file, seg_file, output_folder): - with open(seg_file, 'w') as fid_seg: - with open(origin_file) as fid_ori: - lines = fid_ori.readlines() - for line in lines: - line = line.strip() - line = '.'.join([line, 'jpg']) - img_name = os.path.join("JPEGImages", line) - line = line.replace('jpg', 'png') - anno_name = os.path.join(output_folder.split(os.sep)[-1], line) - new_line = ' '.join([img_name, anno_name]) - fid_seg.write(new_line + "\n") - - -if __name__ == "__main__": - pascal_root = "./VOCtrainval_11-May-2012/VOC2012" - pascal_root = os.path.join(LOCAL_PATH, pascal_root) - seg_folder = os.path.join(pascal_root, "SegmentationClass") - txt_folder = os.path.join(pascal_root, "ImageSets/Segmentation") - train_path = os.path.join(txt_folder, "train.txt") - val_path = os.path.join(txt_folder, "val.txt") - trainval_path = os.path.join(txt_folder, "trainval.txt") - - # 标注图转换后存储目录 - output_folder = os.path.join(pascal_root, "SegmentationClassAug") - - print("annotation convert and file list convert") - if not os.path.exists(os.path.join(LOCAL_PATH, output_folder)): - os.mkdir(os.path.join(LOCAL_PATH, output_folder)) - annotation_names = glob.glob(os.path.join(seg_folder, '*.png')) - for annotation_name in annotation_names: - annotation = remove_colormap(annotation_name) - filename = os.path.basename(annotation_name) - save_name = os.path.join(output_folder, filename) - save_annotation(annotation, save_name) - - convert_list(train_path, train_path.replace('txt', 'list'), output_folder) - convert_list(val_path, val_path.replace('txt', 'list'), output_folder) - convert_list(trainval_path, trainval_path.replace('txt', 'list'), - output_folder) diff --git a/legacy/dataset/download_and_convert_voc2012.py b/legacy/dataset/download_and_convert_voc2012.py deleted file mode 100644 index fd71f54e4d..0000000000 --- a/legacy/dataset/download_and_convert_voc2012.py +++ /dev/null @@ -1,66 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -import numpy as np -import os -import glob - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress -from convert_voc2012 import convert_list -from convert_voc2012 import remove_colormap -from convert_voc2012 import save_annotation - - -def download_VOC_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/VOCtrainval_11-May-2012.tar" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_VOC_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") - - pascal_root = "./VOCtrainval_11-May-2012/VOC2012" - pascal_root = os.path.join(LOCAL_PATH, pascal_root) - seg_folder = os.path.join(pascal_root, "SegmentationClass") - txt_folder = os.path.join(pascal_root, "ImageSets/Segmentation") - train_path = os.path.join(txt_folder, "train.txt") - val_path = os.path.join(txt_folder, "val.txt") - trainval_path = os.path.join(txt_folder, "trainval.txt") - - # 标注图转换后存储目录 - output_folder = os.path.join(pascal_root, "SegmentationClassAug") - - print("annotation convert and file list convert") - if not os.path.exists(output_folder): - os.mkdir(output_folder) - annotation_names = glob.glob(os.path.join(seg_folder, '*.png')) - for annotation_name in annotation_names: - annotation = remove_colormap(annotation_name) - filename = os.path.basename(annotation_name) - save_name = os.path.join(output_folder, filename) - save_annotation(annotation, save_name) - - convert_list(train_path, train_path.replace('txt', 'list'), output_folder) - convert_list(val_path, val_path.replace('txt', 'list'), output_folder) - convert_list(trainval_path, trainval_path.replace('txt', 'list'), - output_folder) diff --git a/legacy/dataset/download_cityscapes.py b/legacy/dataset/download_cityscapes.py deleted file mode 100644 index df88c4d17a..0000000000 --- a/legacy/dataset/download_cityscapes.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_cityscapes_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_cityscapes_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/dataset/download_mini_deepglobe_road_extraction.py b/legacy/dataset/download_mini_deepglobe_road_extraction.py deleted file mode 100644 index 6efbba37fb..0000000000 --- a/legacy/dataset/download_mini_deepglobe_road_extraction.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_deepglobe_road_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_deepglobe_road_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/dataset/download_optic.py b/legacy/dataset/download_optic.py deleted file mode 100644 index d71e743c6c..0000000000 --- a/legacy/dataset/download_optic.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_pet_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/optic_disc_seg.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_pet_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/dataset/download_pet.py b/legacy/dataset/download_pet.py deleted file mode 100644 index 7214d5b387..0000000000 --- a/legacy/dataset/download_pet.py +++ /dev/null @@ -1,34 +0,0 @@ -# coding: utf8 -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os - -LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) -TEST_PATH = os.path.join(LOCAL_PATH, "..", "test") -sys.path.append(TEST_PATH) - -from test_utils import download_file_and_uncompress - - -def download_pet_dataset(savepath, extrapath): - url = "https://paddleseg.bj.bcebos.com/dataset/mini_pet.zip" - download_file_and_uncompress( - url=url, savepath=savepath, extrapath=extrapath) - - -if __name__ == "__main__": - download_pet_dataset(LOCAL_PATH, LOCAL_PATH) - print("Dataset download finish!") diff --git a/legacy/deploy/README.md b/legacy/deploy/README.md deleted file mode 100644 index e134c3fcd0..0000000000 --- a/legacy/deploy/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# PaddleSeg 预测部署 - -`PaddleSeg`目前支持使用`Python`和`C++`部署在`Windows` 和`Linux` 上, 也可以集成`PaddleServing`服务化部署在 `Linux` 上。 - -[1. Python预测(支持 Linux 和 Windows)](./python/) - -[2. C++预测(支持 Linux 和 Windows)](./cpp/) - -[3. 服务化部署(仅支持 Linux)](./serving) - -[4. 移动端部署(仅支持Android)](./lite) - -[5. 使用PaddleServing部署](./paddle-serving) diff --git a/legacy/deploy/cpp/CMakeLists.txt b/legacy/deploy/cpp/CMakeLists.txt deleted file mode 100644 index 3cbb0e1187..0000000000 --- a/legacy/deploy/cpp/CMakeLists.txt +++ /dev/null @@ -1,242 +0,0 @@ -cmake_minimum_required(VERSION 3.0) -project(cpp_inference_demo CXX C) - -option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) -option(USE_TENSORRT "Compile demo with TensorRT." OFF) - -SET(PADDLE_DIR "" CACHE PATH "Location of libraries") -SET(OPENCV_DIR "" CACHE PATH "Location of libraries") -SET(CUDA_LIB "" CACHE PATH "Location of libraries") - - -include(external-cmake/yaml-cpp.cmake) - -macro(safe_set_static_flag) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) -endmacro() - -if (WITH_MKL) - ADD_DEFINITIONS(-DUSE_MKL) -endif() - -if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "") - message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir") -endif() - -if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") - message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") -endif() - -include_directories("${CMAKE_SOURCE_DIR}/") -include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") -include_directories("${PADDLE_DIR}/") -include_directories("${PADDLE_DIR}/third_party/install/protobuf/include") -include_directories("${PADDLE_DIR}/third_party/install/glog/include") -include_directories("${PADDLE_DIR}/third_party/install/gflags/include") -include_directories("${PADDLE_DIR}/third_party/install/xxhash/include") -if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include") - include_directories("${PADDLE_DIR}/third_party/install/snappy/include") -endif() -if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include") - include_directories("${PADDLE_DIR}/third_party/install/snappystream/include") -endif() - -if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - link_directories("${PADDLE_DIR}/third_party/install/snappy/lib") -endif() -if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib") -endif() - -link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib") -link_directories("${PADDLE_DIR}/third_party/install/glog/lib") -link_directories("${PADDLE_DIR}/third_party/install/gflags/lib") -link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib") -link_directories("${PADDLE_DIR}/paddle/lib/") -link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") -link_directories("${CMAKE_CURRENT_BINARY_DIR}") -if (WIN32) - include_directories("${PADDLE_DIR}/paddle/fluid/inference") - include_directories("${PADDLE_DIR}/paddle/include") - link_directories("${PADDLE_DIR}/paddle/fluid/inference") - include_directories("${OPENCV_DIR}/build/include") - include_directories("${OPENCV_DIR}/opencv/build/include") - link_directories("${OPENCV_DIR}/build/x64/vc14/lib") -else () - include_directories("${PADDLE_DIR}/paddle/include") - link_directories("${PADDLE_DIR}/paddle/lib") - include_directories("${OPENCV_DIR}/include") - link_directories("${OPENCV_DIR}/lib64") -endif () - -if (WIN32) - add_definitions("/DGOOGLE_GLOG_DLL_DECL=") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") - if (WITH_STATIC_LIB) - safe_set_static_flag() - add_definitions(-DSTATIC_LIB) - endif() -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11") - set(CMAKE_STATIC_LIBRARY_PREFIX "") -endif() - -# TODO let users define cuda lib path -if (WITH_GPU) - if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "") - message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64") - endif() - if (NOT WIN32) - if (NOT DEFINED CUDNN_LIB) - message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64") - endif() - endif(NOT WIN32) -endif() - - -if (NOT WIN32) - if (USE_TENSORRT AND WITH_GPU) - include_directories("${PADDLE_DIR}/third_party/install/tensorrt/include") - link_directories("${PADDLE_DIR}/third_party/install/tensorrt/lib") - endif() -endif(NOT WIN32) - -if (NOT WIN32) - set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph") - if(EXISTS ${NGRAPH_PATH}) - include(GNUInstallDirs) - include_directories("${NGRAPH_PATH}/include") - link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}") - set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif() -endif() - -if(WITH_MKL) - include_directories("${PADDLE_DIR}/third_party/install/mklml/include") - if (WIN32) - set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib - ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib) - else () - set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} - ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) - execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib) - endif () - set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn") - if(EXISTS ${MKLDNN_PATH}) - include_directories("${MKLDNN_PATH}/include") - if (WIN32) - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) - else () - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) - endif () - endif() -else() - set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) -endif() - -if (WIN32) - if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(DEPS - ${PADDLE_DIR}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) - else() - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() -endif() - -if(WITH_STATIC_LIB) - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}) -else() - set(DEPS - ${PADDLE_DIR}/paddle/lib/libpaddle_fluid${CMAKE_SHARED_LIBRARY_SUFFIX}) -endif() - -if (NOT WIN32) - set(EXTERNAL_LIB "-lrt -ldl -lpthread") - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags protobuf yaml-cpp xxhash - ${EXTERNAL_LIB}) - if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - set(DEPS ${DEPS} snappystream) - endif() - if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - set(DEPS ${DEPS} snappy) - endif() -else() - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - opencv_world346 glog libyaml-cppmt gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) - set(DEPS ${DEPS} libcmt shlwapi) - if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") - set(DEPS ${DEPS} snappy) - endif() - if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") - set(DEPS ${DEPS} snappystream) - endif() -endif(NOT WIN32) - -if(WITH_GPU) - if(NOT WIN32) - if (USE_TENSORRT) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) - endif() - set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) - else() - set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() -endif() - -if (NOT WIN32) - set(DEPS ${DEPS} ${OPENCV_DIR}/lib64/libopencv_imgcodecs${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/lib64/libopencv_imgproc${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/lib64/libopencv_core${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/lib64/libopencv_highgui${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/libIlmImf${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/liblibjasper${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/liblibpng${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/liblibtiff${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/libittnotify${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/liblibjpeg-turbo${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/liblibwebp${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${OPENCV_DIR}/share/OpenCV/3rdparty/lib64/libzlib${CMAKE_STATIC_LIBRARY_SUFFIX}) -endif() - - -SET(PADDLESEG_INFERENCE_SRCS preprocessor/preprocessor.cpp preprocessor/preprocessor_seg.cpp predictor/seg_predictor.cpp) -ADD_LIBRARY(libpaddleseg_inference STATIC ${PADDLESEG_INFERENCE_SRCS}) -target_link_libraries(libpaddleseg_inference ${DEPS}) - -add_executable(demo demo.cpp) -ADD_DEPENDENCIES(libpaddleseg_inference ext-yaml-cpp) -ADD_DEPENDENCIES(demo ext-yaml-cpp libpaddleseg_inference) -target_link_libraries(demo ${DEPS} libpaddleseg_inference) - -if (WIN32) - add_custom_command(TARGET demo POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll - ) -endif() - -execute_process(COMMAND cp -r ${CMAKE_SOURCE_DIR}/images ${CMAKE_SOURCE_DIR}/conf ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/legacy/deploy/cpp/CMakeSettings.json b/legacy/deploy/cpp/CMakeSettings.json deleted file mode 100644 index e664ee6f4d..0000000000 --- a/legacy/deploy/cpp/CMakeSettings.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "configurations": [ - { - "name": "x64-Release", - "generator": "Ninja", - "configurationType": "RelWithDebInfo", - "inheritEnvironments": [ "msvc_x64_x64" ], - "buildRoot": "${projectDir}\\out\\build\\${name}", - "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "", - "buildCommandArgs": "-v", - "ctestCommandArgs": "", - "variables": [ - { - "name": "CUDA_LIB", - "value": "C:/PaddleDeploy/cudalib/v8.0/lib/x64", - "type": "PATH" - }, - { - "name": "OPENCV_DIR", - "value": "C:/PaddleDeploy/opencv", - "type": "PATH" - }, - { - "name": "PADDLE_DIR", - "value": "C:/PaddleDeploy/fluid_inference", - "type": "PATH" - }, - { - "name": "CMAKE_BUILD_TYPE", - "value": "Release", - "type": "STRING" - } - ] - } - ] -} diff --git a/legacy/deploy/cpp/INSTALL.md b/legacy/deploy/cpp/INSTALL.md deleted file mode 100644 index d1c67ece4e..0000000000 --- a/legacy/deploy/cpp/INSTALL.md +++ /dev/null @@ -1,31 +0,0 @@ -# 依赖安装 - -## OpenCV -OpenCV官方Release地址:https://opencv.org/releases/ - -### Windows - -1. 下载Windows安装包:OpenCV-3.4.6 -2. 双击安装到指定位置,如D:\opencv -3. 配置环境变量 -> 1.我的电脑->属性->高级系统设置->环境变量 -> 2.在系统变量中找到Path(如没有,自行创建),并双击编辑 -> 3.新建,将opencv路径填入并保存,如D:\opencv\build\x64\vc14\bin - -### Linux -1. 下载OpenCV-3.4.6 Sources,并解压,如/home/user/opencv-3.4.6 -2. cd opencv-3.4.6 & mkdir build & mkdir release -3. 修改modules/videoio/src/cap_v4l.cpp 在代码第253行下,插入如下代码 -``` -#ifndef V4L2_CID_ROTATE -#define V4L2_CID_ROTATE (V4L2_CID_BASE+34) -#endif -#ifndef V4L2_CID_IRIS_ABSOLUTE -#define V4L2_CID_IRIS_ABSOLUTE (V4L2_CID_CAMERA_CLASS_BASE+17) -#endif -``` -3. cd build -4. cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/ssd2/Jason/tmp/opencv-3.4.6/release/ --OPENCV_FORCE_3RDPARTY_BUILD=OFF -5. make -j10 -6. make install -编译后产出的头文件和lib即安装在/home/user/opencv-3.4.6/release目录下 diff --git a/legacy/deploy/cpp/README.md b/legacy/deploy/cpp/README.md deleted file mode 100644 index abea0f9ad3..0000000000 --- a/legacy/deploy/cpp/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# PaddleSeg 预测部署方案 - - -[1.说明](#1说明) - -[2.主要目录和文件](#2主要目录和文件) - -[3.编译](#3编译) - -[4.预测并可视化结果](#4预测并可视化结果) - -## 1.说明 - -本目录提供一个跨平台`PaddlePaddle`图像分割模型的`C++`预测部署方案,用户通过一定的配置,加上少量的代码,即可把模型集成到自己的服务中,完成图像分割的任务。 - -主要设计的目标包括以下四点: -- 跨平台,支持在 windows 和 Linux 完成编译、开发和部署 -- 支持主流图像分割任务,用户通过少量配置即可加载模型完成常见预测任务,比如人像分割等 -- 可扩展性,支持用户针对新模型开发自己特殊的数据预处理、后处理等逻辑 -- 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像分割的特点对关键步骤进行了性能优化 - -**注意** 如需要使用`Python`的预测部署方法,请参考:[Python预测部署](../python/) - - -## 2.主要目录和文件 - -``` -cpp -├── demo.cpp # 演示加载模型、读入数据、完成预测任务C++代码 -| -├── conf -│ └── humanseg.yaml # 示例人像分割模型配置 -├── images -│ └── humanseg # 示例人像分割模型测试图片目录 -├── tools -│ └── visualize.py # 示例分割模型结果可视化脚本 -├── docs -| ├── linux_build.md # Linux 编译指南 -| ├── windows_vs2015_build.md # windows VS2015编译指南 -│ └── windows_vs2019_build.md # Windows VS2019编译指南 -| -├── utils # 一些基础公共函数 -| -├── preprocess # 数据预处理相关代码 -| -├── predictor # 模型加载和预测相关代码 -| -├── CMakeList.txt # cmake编译入口文件 -| -└── external-cmake # 依赖的外部项目cmake(目前仅有yaml-cpp) - -``` - -## 3.编译 -支持在`Windows`和`Linux`平台编译和使用: -- [Linux 编译指南](./docs/linux_build.md) -- [Windows 使用 Visual Studio 2019 Community 编译指南](./docs/windows_vs2019_build.md) -- [Windows 使用 Visual Studio 2015 编译指南](./docs/windows_vs2015_build.md) - -`Windows`上推荐使用最新的`Visual Studio 2019 Community`直接编译`CMake`项目。 - -## 4.预测并可视化结果 - -完成编译后,便生成了需要的可执行文件和链接库,然后执行以下步骤: - -### 4.1. 下载模型文件 -我们提供了一个人像分割模型示例用于测试,点击右侧地址下载:[示例模型下载地址](https://paddleseg.bj.bcebos.com/inference_model/deeplabv3p_xception65_humanseg.tgz) - -下载并解压,解压后目录结构如下: -``` -deeplabv3p_xception65_humanseg -├── __model__ # 模型文件 -| -└── __params__ # 参数文件 -``` -解压后把上述目录拷贝到合适的路径: - -**假设**`Windows`系统上,我们模型和参数文件所在路径为`D:\projects\models\deeplabv3p_xception65_humanseg`。 - -**假设**`Linux`上对应的路径则为`/root/projects/models/deeplabv3p_xception65_humanseg`。 - - -### 4.2. 修改配置 - -基于`PaddleSeg`训练的模型导出时,会自动生成对应的预测模型配置文件,请参考文档:[模型导出](../../docs/model_export.md)。 - -`inference`源代码(即本目录)的`conf`目录下提供了示例人像分割模型的配置文件`humanseg.yaml`, 相关的字段含义和说明如下: - -```yaml -DEPLOY: - # 是否使用GPU预测 - USE_GPU: 1 - # 是否是PaddleSeg 0.3.0新版本模型 - USE_PR : 1 - # 模型和参数文件所在目录路径 - MODEL_PATH: "/root/projects/models/deeplabv3p_xception65_humanseg" - # 模型文件名 - MODEL_FILENAME: "__model__" - # 参数文件名 - PARAMS_FILENAME: "__params__" - # 预测图片的的标准输入尺寸,输入尺寸不一致会做resize - EVAL_CROP_SIZE: (513, 513) - # 均值 - MEAN: [0.40787450980392154, 0.4575254901960784, 0.481078431372549] - # 方差 - STD: [0.00392156862745098, 0.00392156862745098, 0.00392156862745098] - # 图片类型, rgb 或者 rgba - IMAGE_TYPE: "rgb" - # 分类类型数 - NUM_CLASSES: 2 - # 图片通道数 - CHANNELS : 3 - # 预处理方式,目前提供图像分割的通用处理类SegPreProcessor - PRE_PROCESSOR: "SegPreProcessor" - # 预测模式,支持 NATIVE 和 ANALYSIS - PREDICTOR_MODE: "ANALYSIS" - # 每次预测的 batch_size - BATCH_SIZE : 3 -``` -修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。 - -**注意**在使用CPU版本预测库时,`USE_GPU`的值必须设为0,否则无法正常预测。 - -### 4.3. 执行预测 - -在终端中切换到生成的可执行文件所在目录为当前目录(Windows系统为`cmd`)。 - -`Linux` 系统中执行以下命令: -```shell -./demo --conf=/root/projects/PaddleSeg/deploy/cpp/conf/humanseg.yaml --input_dir=/root/projects/PaddleSeg/deploy/cpp/images/humanseg/ -``` -`Windows` 中执行以下命令: -```shell -D:\projects\PaddleSeg\deploy\cpp\build\Release>demo.exe --conf=D:\\projects\\PaddleSeg\\deploy\\cpp\\conf\\humanseg.yaml --input_dir=D:\\projects\\PaddleSeg\\deploy\\cpp\\images\humanseg\\ -``` - - -预测使用的两个命令参数说明如下: - -| 参数 | 含义 | -|-------|----------| -| conf | 模型配置的Yaml文件路径 | -| input_dir | 需要预测的图片目录 | - - -配置文件说明请参考上一步,样例程序会扫描input_dir目录下的所有以**jpg或jpeg**为后缀的图片,并生成对应的预测结果(若input_dir目录下没有以**jpg或jpeg**为后缀的图片,程序会报错)。图像分割会对`demo.jpg`的每个像素进行分类,其预测的结果保存在`demo_jpg_mask.png`中。分割预测结果的图不能直接看到效果,必须经过可视化处理。对于二分类的图像分割模型。如果需要对预测结果进行**可视化**,请参考[可视化脚本使用方法](./docs/vis.md)。 - -输入原图 -![avatar](images/humanseg/demo2.jpeg) - -输出预测结果 -![avatar](images/humanseg/demo2_jpeg_recover.png) diff --git a/legacy/deploy/cpp/conf/humanseg.yaml b/legacy/deploy/cpp/conf/humanseg.yaml deleted file mode 100644 index 4b09e18dc7..0000000000 --- a/legacy/deploy/cpp/conf/humanseg.yaml +++ /dev/null @@ -1,14 +0,0 @@ -DEPLOY: - USE_GPU: 1 - MODEL_PATH: "/root/projects/models/deeplabv3p_xception65_humanseg" - MODEL_FILENAME: "__model__" - PARAMS_FILENAME: "__params__" - EVAL_CROP_SIZE: (513, 513) - MEAN: [0.40787450980392154, 0.4575254901960784, 0.481078431372549] - STD: [0.00392156862745098, 0.00392156862745098, 0.00392156862745098] - IMAGE_TYPE: "rgb" - NUM_CLASSES: 2 - CHANNELS : 3 - PRE_PROCESSOR: "SegPreProcessor" - PREDICTOR_MODE: "NATIVE" - BATCH_SIZE : 1 diff --git a/legacy/deploy/cpp/demo.cpp b/legacy/deploy/cpp/demo.cpp deleted file mode 100644 index 729b36dde0..0000000000 --- a/legacy/deploy/cpp/demo.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -DEFINE_string(conf, "", "Configuration File Path"); -DEFINE_string(input_dir, "", "Directory of Input Images"); - -int main(int argc, char** argv) { - // 0. parse args - google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_conf.empty() || FLAGS_input_dir.empty()) { - std::cout << "Usage: ./predictor --conf=/config/path/to/your/model " - << "--input_dir=/directory/of/your/input/images" << std::endl; - return -1; - } - // 1. create a predictor and init it with conf - PaddleSolution::Predictor predictor; - if (predictor.init(FLAGS_conf) != 0) { - LOG(FATAL) << "Fail to init predictor"; - return -1; - } - - // 2. get all the images with extension '.jpeg' at input_dir - auto imgs = PaddleSolution::utils::get_directory_images(FLAGS_input_dir, - ".jpeg|.jpg"); - // 3. predict - predictor.predict(imgs); - return 0; -} diff --git a/legacy/deploy/cpp/docs/demo.jpg b/legacy/deploy/cpp/docs/demo.jpg deleted file mode 100644 index 8e5f2157e2..0000000000 Binary files a/legacy/deploy/cpp/docs/demo.jpg and /dev/null differ diff --git a/legacy/deploy/cpp/docs/demo_jpg.png b/legacy/deploy/cpp/docs/demo_jpg.png deleted file mode 100644 index cc702a6900..0000000000 Binary files a/legacy/deploy/cpp/docs/demo_jpg.png and /dev/null differ diff --git a/legacy/deploy/cpp/docs/linux_build.md b/legacy/deploy/cpp/docs/linux_build.md deleted file mode 100644 index b294eb698e..0000000000 --- a/legacy/deploy/cpp/docs/linux_build.md +++ /dev/null @@ -1,101 +0,0 @@ -# Linux平台 编译指南 - -## 说明 -本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过,如果需要使用更高版本的GCC编译使用,则需要重新编译PaddlePaddle预测库,请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html#id15)。 - -## 前置条件 -* G++ 4.8.2 ~ 4.9.4 -* CMake 3.0+ -* CUDA 9.0 / CUDA 10.0, cudnn 7+ (仅在使用GPU版本的预测库时需要) -* CentOS 7.6, Ubuntu 16.04, Ubuntu 18.04 (均在以上系统验证过) - -请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**。 - -### Step1: 下载代码 - -1. `mkdir -p /root/projects/ && cd /root/projects` -2. `git clone https://github.com/PaddlePaddle/PaddleSeg.git` - -`C++`预测代码在`/root/projects/PaddleSeg/deploy/cpp` 目录,该目录不依赖任何`PaddleSeg`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -PaddlePaddle C++ 预测库主要分为CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为两个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。以下为各版本C++预测库的下载链接: - -| 版本 | 链接 | -| ---- | ---- | -| CPU版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.6.1-cpu-avx-mkl/fluid_inference.tgz) | -| CUDA 9.0版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.6.1-gpu-cuda9-cudnn7-avx-mkl/fluid_inference.tgz) | -| CUDA 10.0版本 | [fluid_inference.tgz](https://paddle-inference-lib.bj.bcebos.com/1.6.1-gpu-cuda10-cudnn7-avx-mkl/fluid_inference.tgz) | - - -针对不同的CPU类型、不同的指令集,官方提供更多可用的预测库版本,目前已经推出1.6版本的预测库。其余版本具体请参考以下链接:[C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html) - - -下载并解压后`/root/projects/fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -```shell -# 0. 切换到/root/projects目录 -cd /root/projects -# 1. 下载OpenCV3.4.6版本源代码 -wget -c https://paddleseg.bj.bcebos.com/inference/opencv-3.4.6.zip -# 2. 解压 -unzip opencv-3.4.6.zip && cd opencv-3.4.6 -# 3. 创建build目录并编译, 这里安装到/usr/local/opencv3目录 -mkdir build && cd build -cmake .. -DCMAKE_INSTALL_PREFIX=/root/projects/opencv3 -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_IPP=OFF -DBUILD_IPP_IW=OFF -DWITH_LAPACK=OFF -DWITH_EIGEN=OFF -DCMAKE_INSTALL_LIBDIR=lib64 -DWITH_ZLIB=ON -DBUILD_ZLIB=ON -DWITH_JPEG=ON -DBUILD_JPEG=ON -DWITH_PNG=ON -DBUILD_PNG=ON -DWITH_TIFF=ON -DBUILD_TIFF=ON -make -j4 -make install -``` - -**注意:** 上述操作完成后,`opencv` 被安装在 `/root/projects/opencv3` 目录。 - -### Step4: 编译 - -`CMake`编译时,涉及到四个编译参数用于指定核心依赖库的路径, 他们的定义如下:(带*表示仅在使用**GPU版本**预测库时指定,其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1版本CUDA库**) - -| 参数名 | 含义 | -| ---- | ---- | -| * CUDA_LIB | CUDA的库路径 | -| * CUDNN_LIB | cudnn的库路径| -| OPENCV_DIR | OpenCV的安装路径 | -| PADDLE_DIR | Paddle预测库的路径 | - -在使用**GPU版本**预测库进行编译时,可执行下列操作。**注意**把对应的参数改为你的上述依赖库实际路径: - -```shell -cd /root/projects/PaddleSeg/deploy/cpp -mkdir build && cd build -cmake .. -DWITH_GPU=ON -DPADDLE_DIR=/root/projects/fluid_inference -DCUDA_LIB=/usr/local/cuda/lib64/ -DOPENCV_DIR=/root/projects/opencv3/ -DCUDNN_LIB=/usr/local/cuda/lib64/ -DWITH_STATIC_LIB=OFF -make -``` - -在使用**CPU版本**预测库进行编译时,可执行下列操作。 -```shell -cd /root/projects/PaddleSeg/cpp - -mkdir build && cd build -cmake .. -DWITH_GPU=OFF -DPADDLE_DIR=/root/projects/fluid_inference -DOPENCV_DIR=/root/projects/opencv3/ -DWITH_STATIC_LIB=OFF -make -``` - -### Step5: 预测及可视化 - -执行命令: - -``` -./demo --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考README文档: [预测和可视化部分](../README.md) diff --git a/legacy/deploy/cpp/docs/vis.md b/legacy/deploy/cpp/docs/vis.md deleted file mode 100644 index 9143979994..0000000000 --- a/legacy/deploy/cpp/docs/vis.md +++ /dev/null @@ -1,38 +0,0 @@ -# 图像分割结果可视化说明 - -本文档介绍如何使用可视化脚本对图像分割结果进行可视化处理。**注意:可视化脚本处理仅针对PaddleSeg C++预测部署方案生成的预测结果。** - -## 说明 -图像分割模型会对预测的图像的每个像素进行分类,所以图像分割模型的预测结果是图像里每个像素的标签,我们将预测结果以图片格式保存。例如预测图片`demo.jpg`,其预测的结果以图片格式保存在`demo_jpg.png`中。保存分割预测结果的图片并不能直接看到效果(一张全黑的图),必须经过可视化处理。以下为使用方法。 - -```bash -# 假设当前路径为PaddleSeg根目录 -# 切换到脚本所在目录 -cd inference/tools/ -# 拷贝保存分割预测结果的图片到本目录 -cp XXX/demo_jpg.png . -# 运行可视化脚本 -python visualize.py demo.jpg demo_jpg_mask.png vis_result.png -``` - -以下为上述运行可视化脚本例子中每个参数的含义,请根据测试机器中图片的**实际路径**修改对应参数。 - -| 参数 | 含义 | -|-------|----------| -| demo.jpg | 原图路径 | -| demo_jpg.png | 保存预测结果的图片的路径 | -| vis_result.png| 可视化后的效果图路径| - - -**可视化结果展示:** - -以下以cityscape模型的预测结果进行展示。 - -原图(`demo.jpg`): -![avatar](demo.jpg) - -预测结果图(`demo_jpg.png`, 仅用来保存预测的结果,需经过可视化处理): -![avatar](demo_jpg.png) - -效果图(`vis_result.png`): -![avatar](vis_result.png) diff --git a/legacy/deploy/cpp/docs/vis_result.png b/legacy/deploy/cpp/docs/vis_result.png deleted file mode 100644 index 0b342a3d4d..0000000000 Binary files a/legacy/deploy/cpp/docs/vis_result.png and /dev/null differ diff --git a/legacy/deploy/cpp/docs/windows_vs2015_build.md b/legacy/deploy/cpp/docs/windows_vs2015_build.md deleted file mode 100644 index 4232407774..0000000000 --- a/legacy/deploy/cpp/docs/windows_vs2015_build.md +++ /dev/null @@ -1,116 +0,0 @@ -# Windows平台使用 Visual Studio 2015 编译指南 - -本文档步骤,我们同时在`Visual Studio 2015` 和 `Visual Studio 2019 Community` 两个版本进行了测试,我们推荐使用[`Visual Studio 2019`直接编译`CMake`项目](./windows_vs2019_build.md)。 - - -## 前置条件 -* Visual Studio 2015 -* CUDA 9.0 / CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) -* CMake 3.0+ - -请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `D:\projects`演示**。 - -### Step1: 下载代码 - -1. 打开`cmd`, 执行 `cd /d D:\projects` -2. `git clone http://gitlab.baidu.com/Paddle/PaddleSeg.git` - -`C++`预测库代码在`D:\projects\PaddleSeg\deploy\cpp` 目录,该目录不依赖任何`PaddleSeg`下其他目录。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为两个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库的下载链接: - -| 版本 | 链接 | -| ---- | ---- | -| CPU版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.2/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | -| CUDA 9.0版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.2/win-infer/mkl/post97/fluid_inference_install_dir.zip) | -| CUDA 10.0版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.2/win-infer/mkl/post107/fluid_inference_install_dir.zip) | - -解压后`D:\projects\fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` - -### Step4: 以VS2015为例编译代码 - -以下命令需根据自己系统中各相关依赖的路径进行修改 - -* 调用VS2015, 请根据实际VS安装路径进行调整,打开cmd命令行工具执行以下命令 -* 其他vs版本(比如vs2019),请查找到对应版本的`vcvarsall.bat`路径,替换本命令即可 - -``` -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -``` - -三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): - -| 参数名 | 含义 | -| ---- | ---- | -| *CUDA_LIB | CUDA的库路径 | -| OPENCV_DIR | OpenCV的安装路径 | -| PADDLE_DIR | Paddle预测库的路径 | - -在使用**GPU版本**预测库进行编译时,可执行下列操作。**注意**把对应的参数改为你的上述依赖库实际路径: - -```bash -# 切换到预测库所在目录 -cd /d D:\projects\PaddleSeg\deploy\cpp\ -# 创建构建目录, 重新构建只需要删除该目录即可 -mkdir build -cd build -# cmake构建VS项目 -D:\projects\PaddleSeg\deploy\cpp\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DPADDLE_DIR=D:\projects\fluid_inference -DCUDA_LIB=D:\projects\cudalib\v9.0\lib\x64 -DOPENCV_DIR=D:\projects\opencv -T host=x64 -``` - -在使用**CPU版本**预测库进行编译时,可执行下列操作。 - -```bash -# 切换到预测库所在目录 -cd /d D:\projects\PaddleSeg\deploy\cpp\ -# 创建构建目录, 重新构建只需要删除该目录即可 -mkdir build -cd build -# cmake构建VS项目 -D:\projects\PaddleSeg\deploy\cpp\build> cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=OFF -DPADDLE_DIR=D:\projects\fluid_inference -DOPENCV_DIR=D:\projects\opencv -T host=x64 -``` - -这里的`cmake`参数`-G`, 表示生成对应的VS版本的工程,可以根据自己的`VS`版本调整,具体请参考[cmake文档](https://cmake.org/cmake/help/v3.15/manual/cmake-generators.7.html) - -* 生成可执行文件 - -``` -D:\projects\PaddleSeg\deploy\cpp\build> msbuild /m /p:Configuration=Release cpp_inference_demo.sln -``` - -### Step5: 预测及可视化 - -上述`Visual Studio 2015`编译产出的可执行文件在`build\release`目录下,切换到该目录: -``` -cd /d D:\projects\PaddleSeg\deploy\cpp\build\release -``` - -之后执行命令: - -``` -demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考ReadMe文档: [预测和可视化部分](../README.md) - - diff --git a/legacy/deploy/cpp/docs/windows_vs2019_build.md b/legacy/deploy/cpp/docs/windows_vs2019_build.md deleted file mode 100644 index 92ec561dbf..0000000000 --- a/legacy/deploy/cpp/docs/windows_vs2019_build.md +++ /dev/null @@ -1,102 +0,0 @@ -# Visual Studio 2019 Community CMake 编译指南 - -Windows 平台下,我们使用`Visual Studio 2015` 和 `Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。 - -你也可以使用和`VS2015`一样,通过把`CMake`项目转化成`VS`项目来编译,其中**有差别的部分**在文档中我们有说明,请参考:[使用Visual Studio 2015 编译指南](./windows_vs2015_build.md) - -## 前置条件 -* Visual Studio 2019 -* CUDA 9.0/ CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) -* CMake 3.0+ - -请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 - -**下面所有示例以工作目录为 `D:\projects`演示**。 - -### Step1: 下载代码 - -1. 点击下载源代码:[下载地址](https://github.com/PaddlePaddle/PaddleSeg/archive/release/v0.3.0.zip) -2. 解压,解压后目录重命名为`PaddleSeg` - -以下代码目录路径为`D:\projects\PaddleSeg` 为例。 - - -### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference - -PaddlePaddle C++ 预测库主要分为两大版本:CPU版本和GPU版本。其中,针对不同的CUDA版本,GPU版本预测库又分为三个版本预测库:CUDA 9.0和CUDA 10.0版本预测库。根据Windows环境,下载相应版本的PaddlePaddle预测库,并解压到`D:\projects\`目录。以下为各版本C++预测库的下载链接: - -| 版本 | 链接 | -| ---- | ---- | -| CPU版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.1/win-infer/mkl/cpu/fluid_inference_install_dir.zip) | -| CUDA 9.0版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.1/win-infer/mkl/post97/fluid_inference_install_dir.zip) | -| CUDA 10.0版本 | [fluid_inference_install_dir.zip](https://paddle-wheel.bj.bcebos.com/1.6.1/win-infer/mkl/post107/fluid_inference_install_dir.zip) | - -解压后`D:\projects\fluid_inference`目录包含内容为: -``` -fluid_inference -├── paddle # paddle核心库和头文件 -| -├── third_party # 第三方依赖库和头文件 -| -└── version.txt # 版本和编译信息 -``` - -### Step3: 安装配置OpenCV - -1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) -2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 - - 我的电脑->属性->高级系统设置->环境变量 - - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` - -### Step4: 使用Visual Studio 2019直接编译CMake - -1. 打开Visual Studio 2019 Community,点击`继续但无需代码` -![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png) -2. 点击: `文件`->`打开`->`CMake` -![step2.1](https://paddleseg.bj.bcebos.com/inference/vs2019_step2.png) - -选择项目代码所在路径,并打开`CMakeList.txt`: - -![step2.2](https://paddleseg.bj.bcebos.com/inference/vs2019_step3.png) - -3. 点击:`项目`->`cpp_inference_demo的CMake设置` - -![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png) - -4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径 - -三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): - -| 参数名 | 含义 | -| ---- | ---- | -| *CUDA_LIB | CUDA的库路径 | -| OPENCV_DIR | OpenCV的安装路径 | -| PADDLE_DIR | Paddle预测库的路径 | - -**注意**在使用CPU版本预测库时,需要把CUDA_LIB的勾去掉。 -![step4](https://paddleseg.bj.bcebos.com/inference/vs2019_step5.png) - -**设置完成后**, 点击上图中`保存并生成CMake缓存以加载变量`。 - -5. 点击`生成`->`全部生成` - -![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png) - - -### Step5: 预测及可视化 - -上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: - -``` -cd /d D:\projects\PaddleSeg\deploy\cpp\out\build\x64-Release -``` - -之后执行命令: - -``` -demo.exe --conf=/path/to/your/conf --input_dir=/path/to/your/input/data/directory -``` - -更详细说明请参考ReadMe文档: [预测和可视化部分](../README.md) diff --git a/legacy/deploy/cpp/external-cmake/yaml-cpp.cmake b/legacy/deploy/cpp/external-cmake/yaml-cpp.cmake deleted file mode 100644 index 15fa2674e0..0000000000 --- a/legacy/deploy/cpp/external-cmake/yaml-cpp.cmake +++ /dev/null @@ -1,29 +0,0 @@ - -find_package(Git REQUIRED) - -include(ExternalProject) - -message("${CMAKE_BUILD_TYPE}") - -ExternalProject_Add( - ext-yaml-cpp - GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git - GIT_TAG e0e01d53c27ffee6c86153fa41e7f5e57d3e5c90 - CMAKE_ARGS - -DYAML_CPP_BUILD_TESTS=OFF - -DYAML_CPP_BUILD_TOOLS=OFF - -DYAML_CPP_INSTALL=OFF - -DYAML_CPP_BUILD_CONTRIB=OFF - -DMSVC_SHARED_RT=OFF - -DBUILD_SHARED_LIBS=OFF - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib - -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib - PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" - # Disable install step - INSTALL_COMMAND "" - LOG_DOWNLOAD ON -) diff --git a/legacy/deploy/cpp/images/humanseg/demo1.jpeg b/legacy/deploy/cpp/images/humanseg/demo1.jpeg deleted file mode 100644 index de231b52c7..0000000000 Binary files a/legacy/deploy/cpp/images/humanseg/demo1.jpeg and /dev/null differ diff --git a/legacy/deploy/cpp/images/humanseg/demo2.jpeg b/legacy/deploy/cpp/images/humanseg/demo2.jpeg deleted file mode 100644 index c391962310..0000000000 Binary files a/legacy/deploy/cpp/images/humanseg/demo2.jpeg and /dev/null differ diff --git a/legacy/deploy/cpp/images/humanseg/demo2.jpeg_result.png b/legacy/deploy/cpp/images/humanseg/demo2.jpeg_result.png deleted file mode 100644 index cc871a64fd..0000000000 Binary files a/legacy/deploy/cpp/images/humanseg/demo2.jpeg_result.png and /dev/null differ diff --git a/legacy/deploy/cpp/images/humanseg/demo2_jpeg_recover.png b/legacy/deploy/cpp/images/humanseg/demo2_jpeg_recover.png deleted file mode 100644 index 534bbd9443..0000000000 Binary files a/legacy/deploy/cpp/images/humanseg/demo2_jpeg_recover.png and /dev/null differ diff --git a/legacy/deploy/cpp/images/humanseg/demo3.jpeg b/legacy/deploy/cpp/images/humanseg/demo3.jpeg deleted file mode 100644 index c02b837749..0000000000 Binary files a/legacy/deploy/cpp/images/humanseg/demo3.jpeg and /dev/null differ diff --git a/legacy/deploy/cpp/predictor/seg_predictor.cpp b/legacy/deploy/cpp/predictor/seg_predictor.cpp deleted file mode 100644 index aa88558632..0000000000 --- a/legacy/deploy/cpp/predictor/seg_predictor.cpp +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "seg_predictor.h" -#undef min -namespace PaddleSolution { - using std::chrono::duration_cast; - int Predictor::init(const std::string& conf) { - if (!_model_config.load_config(conf)) { - LOG(FATAL) << "Fail to load config file: [" << conf << "]"; - return -1; - } - _preprocessor = PaddleSolution::create_processor(conf); - if (_preprocessor == nullptr) { - LOG(FATAL) << "Failed to create_processor"; - return -1; - } - - int res_size = _model_config._resize[0] * _model_config._resize[1]; - _mask.resize(res_size); - _scoremap.resize(res_size); - - bool use_gpu = _model_config._use_gpu; - const auto& model_dir = _model_config._model_path; - const auto& model_filename = _model_config._model_file_name; - const auto& params_filename = _model_config._param_file_name; - // load paddle model file - if (_model_config._predictor_mode == "NATIVE") { - paddle::NativeConfig config; - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.prog_file = prog_file; - config.param_file = param_file; - config.fraction_of_gpu_memory = 0; - config.use_gpu = use_gpu; - config.device = 0; - _main_predictor = paddle::CreatePaddlePredictor(config); - } else if (_model_config._predictor_mode == "ANALYSIS") { - paddle::AnalysisConfig config; - if (use_gpu) { - config.EnableUseGpu(100, 0); - if (TRT_MAP.find(_model_config._trt_mode) != TRT_MAP.end()) { - auto precision = TRT_MAP[_model_config._trt_mode]; - bool use_cab = (precision == paddle::AnalysisConfig::Precision::kInt8); - config.EnableTensorRtEngine(1 << 30, _model_config._batch_size, 40, - precision, false, use_cab); - } - } - auto prog_file = utils::path_join(model_dir, model_filename); - auto param_file = utils::path_join(model_dir, params_filename); - config.SetModel(prog_file, param_file); - config.SwitchUseFeedFetchOps(false); - config.SwitchSpecifyInputNames(true); - config.EnableMemoryOptim(); - _main_predictor = paddle::CreatePaddlePredictor(config); - } else { - return -1; - } - return 0; - } - - int Predictor::predict(const std::vector& imgs) { - if (_model_config._predictor_mode == "NATIVE") { - return native_predict(imgs); - } else if (_model_config._predictor_mode == "ANALYSIS") { - return analysis_predict(imgs); - } - return -1; - } - - int Predictor::output_mask(const std::string& fname, float* p_out, - int length, int* height, int* width) { - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - int eval_num_class = _model_config._class_num; - - int blob_out_len = length; - int seg_out_len = eval_height * eval_width * eval_num_class; - if (blob_out_len != seg_out_len) { - LOG(ERROR) << " [FATAL] unequal: input vs output [" << - seg_out_len << "|" << blob_out_len << "]" << std::endl; - return -1; - } - // post process - _mask.clear(); - _scoremap.clear(); - std::vector out_shape{eval_num_class, eval_height, eval_width}; - utils::argmax(p_out, out_shape, _mask, _scoremap); - cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); - mask_png.data = _mask.data(); - std::string nname(fname); - auto pos = fname.rfind("."); - nname[pos] = '_'; - std::string mask_save_name = nname + "_mask.png"; - cv::imwrite(mask_save_name, mask_png); - cv::Mat scoremap_png = cv::Mat(eval_height, eval_width, CV_8UC1); - scoremap_png.data = _scoremap.data(); - std::string scoremap_save_name = nname + std::string("_scoremap.png"); - cv::imwrite(scoremap_save_name, scoremap_png); - std::cout << "save mask of [" << fname << "] done" << std::endl; - - if (height && width) { - int recover_height = *height; - int recover_width = *width; - cv::Mat recover_png = cv::Mat(recover_height, recover_width, CV_8UC1); - cv::resize(scoremap_png, recover_png, - cv::Size(recover_width, recover_height), 0, 0, cv::INTER_CUBIC); - std::string recover_name = nname + std::string("_recover.png"); - cv::imwrite(recover_name, recover_png); - } - return 0; - } - - int Predictor::native_predict(const std::vector& imgs) { - if (imgs.size() == 0) { - LOG(ERROR) << "No image found"; - return -1; - } - int config_batch_size = _model_config._batch_size; - - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - std::size_t total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, - static_cast(total_size)); - int batch = total_size / default_batch_size - + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels - * eval_width * eval_height; - - auto& input_buffer = _buffer; - auto& org_width = _org_width; - auto& org_height = _org_height; - auto& imgs_batch = _imgs_batch; - - input_buffer.resize(batch_buffer_size); - org_width.resize(default_batch_size); - org_height.resize(default_batch_size); - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } - - int real_buffer_size = batch_size * channels - * eval_width * eval_height; - std::vector feeds; - input_buffer.resize(real_buffer_size); - org_height.resize(batch_size); - org_width.resize(batch_size); - for (int i = 0; i < batch_size; ++i) { - org_width[i] = org_height[i] = 0; - } - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } - if (!_preprocessor->batch_process(imgs_batch, - input_buffer.data(), - org_width.data(), - org_height.data())) { - return -1; - } - paddle::PaddleTensor im_tensor; - im_tensor.name = "image"; - if (!_model_config._use_pr) { - im_tensor.shape = std::vector{ batch_size, channels, - eval_height, eval_width }; - } else { - im_tensor.shape = std::vector{ batch_size, eval_height, - eval_width, channels}; - } - im_tensor.data.Reset(input_buffer.data(), - real_buffer_size * sizeof(float)); - im_tensor.dtype = paddle::PaddleDType::FLOAT32; - feeds.push_back(im_tensor); - _outputs.clear(); - auto t1 = std::chrono::high_resolution_clock::now(); - if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { - LOG(ERROR) << - "Failed: NativePredictor->Run() return false at batch: " - << u; - continue; - } - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = duration_cast - (t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; - int out_num = 1; - // print shape of first output tensor for debugging - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < _outputs[0].shape.size(); ++j) { - out_num *= _outputs[0].shape[j]; - std::cout << _outputs[0].shape[j] << ","; - } - std::cout << ")" << std::endl; - - size_t nums = _outputs.front().data.length() / sizeof(float); - if (_model_config._use_pr) { - nums = _outputs.front().data.length() / sizeof(int64_t); - } - // size mismatch checking - bool size_mismatch = out_num % batch_size; - size_mismatch |= (!_model_config._use_pr) && (nums != out_num); - size_mismatch |= _model_config._use_pr && (nums != eval_height * eval_width); - if (size_mismatch) { - LOG(ERROR) << "output with a unexpected size"; - return -1; - } - - if (_model_config._use_pr) { - std::vector out_data; - out_data.resize(out_num); - auto addr = reinterpret_cast(_outputs[0].data.data()); - for (int r = 0; r < out_num; ++r) { - out_data[r] = (int)(addr[r]); - } - for (int r = 0; r < batch_size; ++r) { - cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); - mask_png.data = out_data.data() + eval_height*eval_width*r; - auto name = imgs_batch[r]; - auto pos = name.rfind("."); - name[pos] = '_'; - std::string mask_save_name = name + "_mask.png"; - cv::imwrite(mask_save_name, mask_png); - } - continue; - } - - for (int i = 0; i < batch_size; ++i) { - float* output_addr = reinterpret_cast( - _outputs[0].data.data()) - + i * (nums / batch_size); - output_mask(imgs_batch[i], output_addr, - nums / batch_size, - &org_height[i], - &org_width[i]); - } - } - - return 0; - } - - int Predictor::analysis_predict(const std::vector& imgs) { - if (imgs.size() == 0) { - LOG(ERROR) << "No image found"; - return -1; - } - - int config_batch_size = _model_config._batch_size; - int channels = _model_config._channels; - int eval_width = _model_config._resize[0]; - int eval_height = _model_config._resize[1]; - auto total_size = imgs.size(); - int default_batch_size = std::min(config_batch_size, - static_cast(total_size)); - int batch = total_size / default_batch_size - + ((total_size % default_batch_size) != 0); - int batch_buffer_size = default_batch_size * channels - * eval_width * eval_height; - - auto& input_buffer = _buffer; - auto& org_width = _org_width; - auto& org_height = _org_height; - auto& imgs_batch = _imgs_batch; - - input_buffer.resize(batch_buffer_size); - org_width.resize(default_batch_size); - org_height.resize(default_batch_size); - - for (int u = 0; u < batch; ++u) { - int batch_size = default_batch_size; - if (u == (batch - 1) && (total_size % default_batch_size)) { - batch_size = total_size % default_batch_size; - } - - int real_buffer_size = batch_size * channels - * eval_width * eval_height; - std::vector feeds; - input_buffer.resize(real_buffer_size); - org_height.resize(batch_size); - org_width.resize(batch_size); - for (int i = 0; i < batch_size; ++i) { - org_width[i] = org_height[i] = 0; - } - imgs_batch.clear(); - for (int i = 0; i < batch_size; ++i) { - int idx = u * default_batch_size + i; - imgs_batch.push_back(imgs[idx]); - } - - if (!_preprocessor->batch_process(imgs_batch, - input_buffer.data(), - org_width.data(), - org_height.data())) { - return -1; - } - auto im_tensor = _main_predictor->GetInputTensor("image"); - if (!_model_config._use_pr) { - im_tensor->Reshape({ batch_size, channels, - eval_height, eval_width }); - } else { - im_tensor->Reshape({ batch_size, eval_height, - eval_width, channels}); - } - - im_tensor->copy_from_cpu(input_buffer.data()); - - auto t1 = std::chrono::high_resolution_clock::now(); - _main_predictor->ZeroCopyRun(); - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = duration_cast - (t2 - t1).count(); - std::cout << "runtime = " << duration << std::endl; - - auto output_names = _main_predictor->GetOutputNames(); - auto output_t = _main_predictor->GetOutputTensor( - output_names[0]); - std::vector output_shape = output_t->shape(); - - int out_num = 1; - std::cout << "size of outputs[" << 0 << "]: ("; - for (int j = 0; j < output_shape.size(); ++j) { - out_num *= output_shape[j]; - std::cout << output_shape[j] << ","; - } - std::cout << ")" << std::endl; - - if (_model_config._use_pr) { - std::vector out_data; - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - - std::vector mask_data; - mask_data.resize(out_num); - auto addr = reinterpret_cast(out_data.data()); - for (int r = 0; r < out_num; ++r) { - mask_data[r] = (int)(addr[r]); - } - for (int r = 0; r < batch_size; ++r) { - cv::Mat mask_png = cv::Mat(eval_height, eval_width, CV_8UC1); - mask_png.data = mask_data.data() + eval_height*eval_width*r; - auto name = imgs_batch[r]; - auto pos = name.rfind("."); - name[pos] = '_'; - std::string mask_save_name = name + "_mask.png"; - cv::imwrite(mask_save_name, mask_png); - } - continue; - } - - std::vector out_data; - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - for (int i = 0; i < batch_size; ++i) { - float* out_addr = out_data.data() - + (out_num / batch_size) * i; - output_mask(imgs_batch[i], out_addr, out_num / batch_size, - &org_height[i], &org_width[i]); - } - } - return 0; - } -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/predictor/seg_predictor.h b/legacy/deploy/cpp/predictor/seg_predictor.h deleted file mode 100644 index 61f68b76e2..0000000000 --- a/legacy/deploy/cpp/predictor/seg_predictor.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include "utils/seg_conf_parser.h" -#include "utils/utils.h" -#include "preprocessor/preprocessor.h" - -namespace PaddleSolution { -class Predictor { - public: - // init a predictor with a yaml config file - int init(const std::string& conf); - // predict api - int predict(const std::vector& imgs); - private: - int output_mask(const std::string& fname, float* p_out, int length, - int* height = NULL, int* width = NULL); - int native_predict(const std::vector& imgs); - int analysis_predict(const std::vector& imgs); - private: - std::vector _buffer; - std::vector _org_width; - std::vector _org_height; - std::vector _imgs_batch; - std::vector _outputs; - - std::vector _mask; - std::vector _scoremap; - - PaddleSolution::PaddleSegModelConfigPaser _model_config; - std::shared_ptr _preprocessor; - std::unique_ptr _main_predictor; - std::map TRT_MAP = { - {"FP32", paddle::AnalysisConfig::Precision::kFloat32}, - {"FP16", paddle::AnalysisConfig::Precision::kHalf}, - {"INT8", paddle::AnalysisConfig::Precision::kInt8} - }; -}; -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/preprocessor/preprocessor.cpp b/legacy/deploy/cpp/preprocessor/preprocessor.cpp deleted file mode 100644 index 9d6d20b5fb..0000000000 --- a/legacy/deploy/cpp/preprocessor/preprocessor.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include - -#include "preprocessor.h" -#include "preprocessor_seg.h" - - -namespace PaddleSolution { - - std::shared_ptr create_processor( - const std::string& conf_file) { - auto config = std::make_shared(); - if (!config->load_config(conf_file)) { - LOG(FATAL) << "fail to laod conf file [" << conf_file << "]"; - return nullptr; - } - - if (config->_pre_processor == "SegPreProcessor") { - auto p = std::make_shared(); - if (!p->init(config)) { - return nullptr; - } - return p; - } - - LOG(FATAL) << "unknown processor_name [" << config->_pre_processor - << "]"; - - return nullptr; - } -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/preprocessor/preprocessor.h b/legacy/deploy/cpp/preprocessor/preprocessor.h deleted file mode 100644 index 4a1372de61..0000000000 --- a/legacy/deploy/cpp/preprocessor/preprocessor.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include - -#include -#include -#include - -#include "utils/seg_conf_parser.h" - -namespace PaddleSolution { - -class ImagePreProcessor { - protected: - ImagePreProcessor() {} - public: - virtual ~ImagePreProcessor() {} - - virtual bool single_process(const std::string& fname, float* data, - int* ori_w, int* ori_h) = 0; - - virtual bool batch_process(const std::vector& imgs, - float* data, int* ori_w, int* ori_h) = 0; -}; // end of class ImagePreProcessor - -std::shared_ptr create_processor( - const std::string &config_file); - -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/preprocessor/preprocessor_seg.cpp b/legacy/deploy/cpp/preprocessor/preprocessor_seg.cpp deleted file mode 100644 index 11505a1390..0000000000 --- a/legacy/deploy/cpp/preprocessor/preprocessor_seg.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "preprocessor_seg.h" - -#include - -#include - - -namespace PaddleSolution { - - bool SegPreProcessor::single_process(const std::string& fname, - float* data, int* ori_w, int* ori_h) { - cv::Mat im = cv::imread(fname, -1); - if (im.data == nullptr || im.empty()) { - LOG(ERROR) << "Failed to open image: " << fname; - return false; - } - int channels = im.channels(); - *ori_w = im.cols; - *ori_h = im.rows; - - if (channels == 1) { - cv::cvtColor(im, im, cv::COLOR_GRAY2BGR); - } - channels = im.channels(); - if (channels != 3 && channels != 4) { - LOG(ERROR) << "Only support rgb(gray) and rgba image."; - return false; - } - cv::Size resize_size(_config->_resize[0], _config->_resize[1]); - int rw = resize_size.width; - int rh = resize_size.height; - if (*ori_h != rh || *ori_w != rw) { - cv::resize(im, im, resize_size, 0, 0, cv::INTER_LINEAR); - } - - if (!_config->_use_pr) { - utils::normalize(im, data, _config->_mean, _config->_std); - } else { - utils::flatten_mat(im, data); - } - return true; - } - - bool SegPreProcessor::batch_process(const std::vector& imgs, - float* data, int* ori_w, int* ori_h) { - auto ic = _config->_channels; - auto iw = _config->_resize[0]; - auto ih = _config->_resize[1]; - std::vector threads; - for (int i = 0; i < imgs.size(); ++i) { - std::string path = imgs[i]; - float* buffer = data + i * ic * iw * ih; - int* width = &ori_w[i]; - int* height = &ori_h[i]; - threads.emplace_back([this, path, buffer, width, height] { - single_process(path, buffer, width, height); - }); - } - for (auto& t : threads) { - if (t.joinable()) { - t.join(); - } - } - return true; - } - - bool SegPreProcessor::init( - std::shared_ptr config) { - _config = config; - return true; - } -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/preprocessor/preprocessor_seg.h b/legacy/deploy/cpp/preprocessor/preprocessor_seg.h deleted file mode 100644 index 5eba23e555..0000000000 --- a/legacy/deploy/cpp/preprocessor/preprocessor_seg.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include "preprocessor.h" -#include "utils/utils.h" - -namespace PaddleSolution { - -class SegPreProcessor : public ImagePreProcessor { - public: - SegPreProcessor() : _config(nullptr) {} - - bool init( - std::shared_ptr config); - - bool single_process(const std::string &fname, float* data, - int* ori_w, int* ori_h); - - bool batch_process(const std::vector& imgs, float* data, - int* ori_w, int* ori_h); - private: - std::shared_ptr _config; -}; -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/tools/visualize.py b/legacy/deploy/cpp/tools/visualize.py deleted file mode 100644 index c249a1db73..0000000000 --- a/legacy/deploy/cpp/tools/visualize.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import cv2 -import sys - -# ColorMap for visualization more clearly -color_map = [[128, 64, 128], [244, 35, 231], [69, 69, 69], [102, 102, 156], - [190, 153, 153], [153, 153, 153], [250, 170, 29], [219, 219, 0], - [106, 142, 35], [152, 250, 152], [69, 129, 180], [219, 19, 60], - [255, 0, 0], [0, 0, 142], [0, 0, 69], [0, 60, 100], [0, 79, 100], - [0, 0, 230], [119, 10, 32]] -# python visualize.py demo1.jpg demo1_jpg.png vis_result.png -if __name__ == "__main__": - if len(sys.argv) != 4: - print( - "Usage: python visualize.py demo1.jpg demo1_jpg.png vis_result.png") - else: - ori_im = cv2.imread(sys.argv[1]) - ori_shape = ori_im.shape - print(ori_shape) - im = cv2.imread(sys.argv[2]) - shape = im.shape - print("visualizing...") - for i in range(0, shape[0]): - for j in range(0, shape[1]): - im[i, j] = color_map[im[i, j, 0]] - im = cv2.resize(im, (ori_shape[1], ori_shape[0])) - cv2.imwrite(sys.argv[3], im) - print("visualizing done!") diff --git a/legacy/deploy/cpp/utils/seg_conf_parser.h b/legacy/deploy/cpp/utils/seg_conf_parser.h deleted file mode 100644 index ce1089ae44..0000000000 --- a/legacy/deploy/cpp/utils/seg_conf_parser.h +++ /dev/null @@ -1,262 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include - -namespace PaddleSolution { -class PaddleSegModelConfigPaser { - public: - PaddleSegModelConfigPaser() - :_class_num(0), - _channels(0), - _use_gpu(0), - _use_pr(0), - _batch_size(1), - _model_file_name("__model__"), - _param_file_name("__params__") { - } - ~PaddleSegModelConfigPaser() { - } - - void reset() { - _resize.clear(); - _mean.clear(); - _std.clear(); - _img_type.clear(); - _class_num = 0; - _channels = 0; - _use_gpu = 0; - _use_pr = 0; - _batch_size = 1; - _model_file_name.clear(); - _model_path.clear(); - _param_file_name.clear(); - _trt_mode.clear(); - } - - std::string process_parenthesis(const std::string& str) { - if (str.size() < 2) { - return str; - } - std::string nstr(str); - if (str[0] == '(' && str.back() == ')') { - nstr[0] = '['; - nstr[str.size() - 1] = ']'; - } - return nstr; - } - - template - std::vector parse_str_to_vec(const std::string& str) { - std::vector data; - auto node = YAML::Load(str); - for (const auto& item : node) { - data.push_back(item.as()); - } - return data; - } - - bool load_config(const std::string& conf_file) { - reset(); - YAML::Node config; - try { - config = YAML::LoadFile(conf_file); - } catch(...) { - return false; - } - // 1. get resize - if (config["DEPLOY"]["EVAL_CROP_SIZE"].IsDefined()) { - auto str = config["DEPLOY"]["EVAL_CROP_SIZE"].as(); - _resize = parse_str_to_vec(process_parenthesis(str)); - } else { - std::cerr << "Please set EVAL_CROP_SIZE: (xx, xx)" << std::endl; - return false; - } - - // 2. get mean - if (config["DEPLOY"]["MEAN"].IsDefined()) { - for (const auto& item : config["DEPLOY"]["MEAN"]) { - _mean.push_back(item.as()); - } - } else { - std::cerr << "Please set MEAN: [xx, xx, xx]" << std::endl; - return false; - } - - // 3. get std - if(config["DEPLOY"]["STD"].IsDefined()) { - for (const auto& item : config["DEPLOY"]["STD"]) { - _std.push_back(item.as()); - } - } else { - std::cerr << "Please set STD: [xx, xx, xx]" << std::endl; - return false; - } - - // 4. get image type - if (config["DEPLOY"]["IMAGE_TYPE"].IsDefined()) { - _img_type = config["DEPLOY"]["IMAGE_TYPE"].as(); - } else { - std::cerr << "Please set IMAGE_TYPE: \"rgb\" or \"rgba\"" << std::endl; - return false; - } - // 5. get class number - if (config["DEPLOY"]["NUM_CLASSES"].IsDefined()) { - _class_num = config["DEPLOY"]["NUM_CLASSES"].as(); - } else { - std::cerr << "Please set NUM_CLASSES: x" << std::endl; - return false; - } - // 7. set model path - if (config["DEPLOY"]["MODEL_PATH"].IsDefined()) { - _model_path = config["DEPLOY"]["MODEL_PATH"].as(); - } else { - std::cerr << "Please set MODEL_PATH: \"/path/to/model_dir\"" << std::endl; - return false; - } - // 8. get model file_name - if (config["DEPLOY"]["MODEL_FILENAME"].IsDefined()) { - _model_file_name = config["DEPLOY"]["MODEL_FILENAME"].as(); - } else { - _model_file_name = "__model__"; - } - // 9. get model param file name - if (config["DEPLOY"]["PARAMS_FILENAME"].IsDefined()) { - _param_file_name - = config["DEPLOY"]["PARAMS_FILENAME"].as(); - } else { - _param_file_name = "__params__"; - } - // 10. get pre_processor - if (config["DEPLOY"]["PRE_PROCESSOR"].IsDefined()) { - _pre_processor = config["DEPLOY"]["PRE_PROCESSOR"].as(); - } else { - std::cerr << "Please set PRE_PROCESSOR: \"DetectionPreProcessor\"" << std::endl; - return false; - } - // 11. use_gpu - if (config["DEPLOY"]["USE_GPU"].IsDefined()) { - _use_gpu = config["DEPLOY"]["USE_GPU"].as(); - } else { - _use_gpu = 0; - } - // 12. predictor_mode - if (config["DEPLOY"]["PREDICTOR_MODE"].IsDefined()) { - _predictor_mode = config["DEPLOY"]["PREDICTOR_MODE"].as(); - } else { - std::cerr << "Please set PREDICTOR_MODE: \"NATIVE\" or \"ANALYSIS\"" << std::endl; - return false; - } - // 13. batch_size - if (config["DEPLOY"]["BATCH_SIZE"].IsDefined()) { - _batch_size = config["DEPLOY"]["BATCH_SIZE"].as(); - } else { - _batch_size = 1; - } - // 14. channels - if (config["DEPLOY"]["CHANNELS"].IsDefined()) { - _channels = config["DEPLOY"]["CHANNELS"].as(); - } else { - std::cerr << "Please set CHANNELS: x" << std::endl; - return false; - } - // 15. use_pr - if (config["DEPLOY"]["USE_PR"].IsDefined()) { - _use_pr = config["DEPLOY"]["USE_PR"].as(); - } else { - _use_pr = 0; - } - // 16. trt_mode - if (config["DEPLOY"]["TRT_MODE"].IsDefined()) { - _trt_mode = config["DEPLOY"]["TRT_MODE"].as(); - } else { - _trt_mode = ""; - } - return true; - } - - void debug() const { - std::cout << "EVAL_CROP_SIZE: (" - << _resize[0] << ", " << _resize[1] - << ")" << std::endl; - std::cout << "MEAN: ["; - for (int i = 0; i < _mean.size(); ++i) { - if (i != _mean.size() - 1) { - std::cout << _mean[i] << ", "; - } else { - std::cout << _mean[i]; - } - } - std::cout << "]" << std::endl; - - std::cout << "STD: ["; - for (int i = 0; i < _std.size(); ++i) { - if (i != _std.size() - 1) { - std::cout << _std[i] << ", "; - } else { - std::cout << _std[i]; - } - } - std::cout << "]" << std::endl; - - std::cout << "DEPLOY.IMAGE_TYPE: " << _img_type << std::endl; - std::cout << "DEPLOY.NUM_CLASSES: " << _class_num << std::endl; - std::cout << "DEPLOY.CHANNELS: " << _channels << std::endl; - std::cout << "DEPLOY.MODEL_PATH: " << _model_path << std::endl; - std::cout << "DEPLOY.MODEL_FILENAME: " << _model_file_name << std::endl; - std::cout << "DEPLOY.PARAMS_FILENAME: " - << _param_file_name << std::endl; - std::cout << "DEPLOY.PRE_PROCESSOR: " << _pre_processor << std::endl; - std::cout << "DEPLOY.USE_GPU: " << _use_gpu << std::endl; - std::cout << "DEPLOY.PREDICTOR_MODE: " << _predictor_mode << std::endl; - std::cout << "DEPLOY.BATCH_SIZE: " << _batch_size << std::endl; - } - - // DEPLOY.EVAL_CROP_SIZE - std::vector _resize; - // DEPLOY.MEAN - std::vector _mean; - // DEPLOY.STD - std::vector _std; - // DEPLOY.IMAGE_TYPE - std::string _img_type; - // DEPLOY.NUM_CLASSES - int _class_num; - // DEPLOY.CHANNELS - int _channels; - // DEPLOY.MODEL_PATH - std::string _model_path; - // DEPLOY.MODEL_FILENAME - std::string _model_file_name; - // DEPLOY.PARAMS_FILENAME - std::string _param_file_name; - // DEPLOY.PRE_PROCESSOR - std::string _pre_processor; - // DEPLOY.USE_GPU - int _use_gpu; - // DEPLOY.PREDICTOR_MODE - std::string _predictor_mode; - // DEPLOY.BATCH_SIZE - int _batch_size; - // DEPLOY.USE_PR: OP Optimized model - int _use_pr; - // DEPLOY.TRT_MODE: TRT Precesion - std::string _trt_mode; -}; - -} // namespace PaddleSolution diff --git a/legacy/deploy/cpp/utils/utils.h b/legacy/deploy/cpp/utils/utils.h deleted file mode 100644 index 7c81effa7e..0000000000 --- a/legacy/deploy/cpp/utils/utils.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -#include -#include -#include - -#ifdef _WIN32 -#define GLOG_NO_ABBREVIATED_SEVERITIES -#include -#else -#include -#include -#endif - -namespace PaddleSolution { -namespace utils { - inline std::string path_join(const std::string& dir, - const std::string& path) { - std::string seperator = "/"; - #ifdef _WIN32 - seperator = "\\"; - #endif - return dir + seperator + path; - } - #ifndef _WIN32 - // scan a directory and get all files with input extensions - inline std::vector get_directory_images( - const std::string& path, const std::string& exts) { - std::vector imgs; - struct dirent *entry; - DIR *dir = opendir(path.c_str()); - if (dir == NULL) { - closedir(dir); - return imgs; - } - - while ((entry = readdir(dir)) != NULL) { - std::string item = entry->d_name; - auto ext = strrchr(entry->d_name, '.'); - if (!ext || std::string(ext) == "." || std::string(ext) == "..") { - continue; - } - if (exts.find(ext) != std::string::npos) { - imgs.push_back(path_join(path, entry->d_name)); - } - } - return imgs; - } - #else - // scan a directory and get all files with input extensions - inline std::vector get_directory_images( - const std::string& path, const std::string& exts) { - std::string pattern(path); - pattern.append("\\*"); - std::vector imgs; - WIN32_FIND_DATA data; - HANDLE hFind; - if ((hFind = FindFirstFile(pattern.c_str(), &data)) != INVALID_HANDLE_VALUE) { - do { - auto fname = std::string(data.cFileName); - auto pos = fname.rfind("."); - auto ext = fname.substr(pos + 1); - if (ext.size() > 1 && exts.find(ext) != std::string::npos) { - imgs.push_back(path + "\\" + data.cFileName); - } - } while (FindNextFile(hFind, &data) != 0); - FindClose(hFind); - } - return imgs; - } - #endif - - // normalize and HWC_BGR -> CHW_RGB - inline void normalize(cv::Mat& im, float* data, std::vector& fmean, - std::vector& fstd) { - int rh = im.rows; - int rw = im.cols; - int rc = im.channels(); - double normf = static_cast(1.0) / 255.0; - #pragma omp parallel for - for (int h = 0; h < rh; ++h) { - const uchar* ptr = im.ptr(h); - int im_index = 0; - for (int w = 0; w < rw; ++w) { - for (int c = 0; c < rc; ++c) { - int top_index = (c * rh + h) * rw + w; - float pixel = static_cast(ptr[im_index++]); - pixel = (pixel * normf - fmean[c]) / fstd[c]; - data[top_index] = pixel; - } - } - } - } - - // flatten a cv::mat - inline void flatten_mat(cv::Mat& im, float* data) { - int rh = im.rows; - int rw = im.cols; - int rc = im.channels(); - #pragma omp parallel for - for (int h = 0; h < rh; ++h) { - const uchar* ptr = im.ptr(h); - int im_index = 0; - int top_index = h * rw * rc; - for (int w = 0; w < rw; ++w) { - for (int c = 0; c < rc; ++c) { - float pixel = static_cast(ptr[im_index++]); - data[top_index++] = pixel; - } - } - } - } - - // argmax - inline void argmax(float* out, std::vector& shape, - std::vector& mask, std::vector& scoremap) { - int out_img_len = shape[1] * shape[2]; - int blob_out_len = out_img_len * shape[0]; - float max_value = -1; - int label = 0; - #pragma omp parallel private(label) - for (int i = 0; i < out_img_len; ++i) { - max_value = -1; - label = 0; - #pragma omp for reduction(max : max_value) - for (int j = 0; j < shape[0]; ++j) { - int index = i + j * out_img_len; - if (index >= blob_out_len) { - continue; - } - float value = out[index]; - if (value > max_value) { - max_value = value; - label = j; - } - } - if (label == 0) max_value = 0; - mask[i] = uchar(label); - scoremap[i] = uchar(max_value * 255); - } - } -} // namespace utils -} // namespace PaddleSolution diff --git a/legacy/deploy/lite/README.md b/legacy/deploy/lite/README.md deleted file mode 100644 index 822f5999af..0000000000 --- a/legacy/deploy/lite/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# 人像分割在移动端的部署 - -## 1.介绍 -以人像分割在安卓端的部署为例,介绍如何使用[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)对分割模型进行移动端的部署。文档第二节介绍如何使用人像分割安卓端的demo,后面几章节介绍如何将PaddleSeg的Model部署到安卓设备。 - -## 2.安卓Demo使用 - -### 2.1 要求 -* Android Studio 3.4; -* Android手机或开发板; - -### 2.2 安装 -* git clone https://github.com/PaddlePaddle/PaddleSeg.git ; -* 打开Android Studio,在"Welcome to Android Studio"窗口点击"Open an existing Android Studio project",在弹出的路径选择窗口中进入"/PaddleSeg/lite/humanseg_android_demo/"目录,然后点击右下角的"Open"按钮即可导入工程,构建工程的过程中会下载demo需要的模型和Lite预测库; -* 通过USB连接Android手机或开发板; -* 载入工程后,点击菜单栏的Run->Run 'App'按钮,在弹出的"Select Deployment Target"窗口选择已经连接的Android设备,然后点击"OK"按钮; -* 在人像分割Demo中,默认会载入一张人像图像,并会在图像下方给出CPU的预测结果; -* 在人像分割Demo中,你还可以通过上方的"Gallery"和"Take Photo"按钮从相册或相机中加载测试图像; - -### 2.3 其他 -此安卓demo基于[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo)开发,更多的细节请参考该repo。
-*注意:demo中拍照时照片会自动压缩,想测试拍照原图效果,可使用手机相机拍照后从相册中打开进行预测。* - -### 2.4 效果展示 - -## 3.模型导出 -此demo的人像分割模型为[下载链接](https://paddleseg.bj.bcebos.com/models/humanseg_mobilenetv2_1_0_bn_freeze_model_pr_po.zip),是基于Deeplab_v3+mobileNet_v2的humanseg模型,关于humanseg的介绍移步[特色垂类分割模型](../../contrib),更多的分割模型导出可参考:[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v0.2.0/docs/model_export.md) - -## 4.模型转换 - -### 4.1模型转换工具 -准备好PaddleSeg导出来的模型和参数文件后,需要使用Paddle-Lite提供的model_optimize_tool对模型进行优化,并转换成Paddle-Lite支持的文件格式,这里有两种方式来实现: - -* 手动编译model_optimize_tool -详细的模型转换方法参考paddlelite提供的官方文档:[模型转化方法](https://paddlepaddle.github.io/Paddle-Lite/v2.0.0/model_optimize_tool/),从PaddleSeg里面导出来的模型使用model_optimize_tool即可导出model.nb和param.nb文件。 - -* 使用预编译版本的model_optimize_tool,最新的预编译文件参考[release](https://github.com/PaddlePaddle/Paddle-Lite/releases/),此demo使用的版本为[model_optimize_tool](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.0.0/model_optimize_tool) ; - - *注意:如果运行失败,请在[Paddle-Lite源码编译](https://paddlepaddle.github.io/Paddle-Lite/v2.0.0/source_compile/)的开发环境中使用model_optimize_tool* - -### 4.2 更新模型 -将优化好的model.nb和param.nb文件,替换app/src/main/assets/image_segmentation/ -models/deeplab_mobilenet_for_cpu下面的文件即可。 - -## 5. 更新预测库 -Paddle-Lite的编译目前支持Docker,Linux和Mac OS开发环境,建议使用Docker开发环境,以免存在各种依赖问题,同时也提供了预编译版本的预测库。准备Paddle-Lite在安卓端的预测库,主要包括三个文件: - -* PaddlePredictor.jar; -* arm64-v8a/libpaddle_lite_jni.so; -* armeabi-v7a/libpaddle_lite_jni.so; - -下面分别介绍两种方法: - -* 使用预编译版本的预测库,最新的预编译文件参考:[release](https://github.com/PaddlePaddle/Paddle-Lite/releases/),此demo使用的版本: - - * arm64-v8a: [inference_lite_lib.android.armv8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.0.0/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.full_publish.tar.gz) ; - - * armeabi-v7a: [inference_lite_lib.android.armv7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.0.0/inference_lite_lib.android.armv7.gcc.c++_shared.with_extra.full_publish.tar.gz) ; - - 解压上面两个文件,PaddlePredictor.jar位于任一文件夹:inference_lite_lib.android.xxx/java/jar/PaddlePredictor.jar; - - 解压上述inference_lite_lib.android.armv8文件,arm64-v8a/libpaddle_lite_jni.so位于:inference_lite_lib.android.armv8/java/so/libpaddle_lite_jni.so; - - 解压上述inference_lite_lib.android.armv7文件,armeabi-v7a/libpaddle_lite_jni.so位于:inference_lite_lib.android.armv7/java/so/libpaddle_lite_jni.so; - -* 手动编译Paddle-Lite预测库 -开发环境的准备和编译方法参考:[Paddle-Lite源码编译](https://paddlepaddle.github.io/Paddle-Lite/v2.0.0/source_compile/)。 - -准备好上述文件,即可参考[java_api](https://paddlepaddle.github.io/Paddle-Lite/v2.0.0/java_api_doc/)在安卓端进行推理。具体使用预测库的方法可参考[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo)中更新预测库部分的文档。 diff --git a/legacy/deploy/lite/example/human_1.png b/legacy/deploy/lite/example/human_1.png deleted file mode 100644 index a167663bf8..0000000000 Binary files a/legacy/deploy/lite/example/human_1.png and /dev/null differ diff --git a/legacy/deploy/lite/example/human_2.png b/legacy/deploy/lite/example/human_2.png deleted file mode 100644 index 8895e5c0a7..0000000000 Binary files a/legacy/deploy/lite/example/human_2.png and /dev/null differ diff --git a/legacy/deploy/lite/example/human_3.png b/legacy/deploy/lite/example/human_3.png deleted file mode 100644 index 31ba12e450..0000000000 Binary files a/legacy/deploy/lite/example/human_3.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/.gitignore b/legacy/deploy/lite/human_segmentation_demo/.gitignore deleted file mode 100644 index 2b75303ac5..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -*.iml -.gradle -/local.properties -/.idea/caches -/.idea/libraries -/.idea/modules.xml -/.idea/workspace.xml -/.idea/navEditor.xml -/.idea/assetWizardSettings.xml -.DS_Store -/build -/captures -.externalNativeBuild diff --git a/legacy/deploy/lite/human_segmentation_demo/app/.gitignore b/legacy/deploy/lite/human_segmentation_demo/app/.gitignore deleted file mode 100644 index 796b96d1c4..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/build diff --git a/legacy/deploy/lite/human_segmentation_demo/app/build.gradle b/legacy/deploy/lite/human_segmentation_demo/app/build.gradle deleted file mode 100644 index 88d5a19ece..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/build.gradle +++ /dev/null @@ -1,119 +0,0 @@ -import java.security.MessageDigest - -apply plugin: 'com.android.application' - -android { - compileSdkVersion 28 - defaultConfig { - applicationId "com.baidu.paddle.lite.demo.human_segmentation" - minSdkVersion 15 - targetSdkVersion 28 - versionCode 1 - versionName "1.0" - testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" - } - buildTypes { - release { - minifyEnabled false - proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' - } - } -} - -dependencies { - implementation fileTree(include: ['*.jar'], dir: 'libs') - implementation 'com.android.support:appcompat-v7:28.0.0' - implementation 'com.android.support.constraint:constraint-layout:1.1.3' - implementation 'com.android.support:design:28.0.0' - testImplementation 'junit:junit:4.12' - androidTestImplementation 'com.android.support.test:runner:1.0.2' - androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2' - implementation files('libs/PaddlePredictor.jar') -} - -def paddleLiteLibs = 'https://paddlelite-demo.bj.bcebos.com/libs/android/paddle_lite_libs_v2_1_0_bug_fixed.tar.gz' -task downloadAndExtractPaddleLiteLibs(type: DefaultTask) { - doFirst { - println "Downloading and extracting Paddle Lite libs" - } - doLast { - // Prepare cache folder for libs - if (!file("cache").exists()) { - mkdir "cache" - } - // Generate cache name for libs - MessageDigest messageDigest = MessageDigest.getInstance('MD5') - messageDigest.update(paddleLiteLibs.bytes) - String cacheName = new BigInteger(1, messageDigest.digest()).toString(32) - // Download libs - if (!file("cache/${cacheName}.tar.gz").exists()) { - ant.get(src: paddleLiteLibs, dest: file("cache/${cacheName}.tar.gz")) - } - // Unpack libs - copy { - from tarTree("cache/${cacheName}.tar.gz") - into "cache/${cacheName}" - } - // Copy PaddlePredictor.jar - if (!file("libs/PaddlePredictor.jar").exists()) { - copy { - from "cache/${cacheName}/java/PaddlePredictor.jar" - into "libs" - } - } - // Copy libpaddle_lite_jni.so for armeabi-v7a and arm64-v8a - if (!file("src/main/jniLibs/armeabi-v7a/libpaddle_lite_jni.so").exists()) { - copy { - from "cache/${cacheName}/java/libs/armeabi-v7a/" - into "src/main/jniLibs/armeabi-v7a" - } - } - if (!file("src/main/jniLibs/arm64-v8a/libpaddle_lite_jni.so").exists()) { - copy { - from "cache/${cacheName}/java/libs/arm64-v8a/" - into "src/main/jniLibs/arm64-v8a" - } - } - } -} -preBuild.dependsOn downloadAndExtractPaddleLiteLibs - -def paddleLiteModels = [ - [ - 'src' : 'https://paddlelite-demo.bj.bcebos.com/models/deeplab_mobilenet_fp32_for_cpu_v2_1_0.tar.gz', - 'dest' : 'src/main/assets/image_segmentation/models/deeplab_mobilenet_for_cpu' - ], -] -task downloadAndExtractPaddleLiteModels(type: DefaultTask) { - doFirst { - println "Downloading and extracting Paddle Lite models" - } - doLast { - // Prepare cache folder for models - if (!file("cache").exists()) { - mkdir "cache" - } - paddleLiteModels.eachWithIndex { model, index -> - MessageDigest messageDigest = MessageDigest.getInstance('MD5') - messageDigest.update(model.src.bytes) - String cacheName = new BigInteger(1, messageDigest.digest()).toString(32) - // Download model file - if (!file("cache/${cacheName}.tar.gz").exists()) { - ant.get(src: model.src, dest: file("cache/${cacheName}.tar.gz")) - } - // Unpack model file - copy { - from tarTree("cache/${cacheName}.tar.gz") - into "cache/${cacheName}" - } - // Copy model file - if (!file("${model.dest}/__model__.nb").exists() || !file("${model.dest}/param.nb").exists()) { - copy { - from "cache/${cacheName}" - into "${model.dest}" - } - } - } - } -} -preBuild.dependsOn downloadAndExtractPaddleLiteModels diff --git a/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.jar b/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index f6b961fd5a..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.properties b/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 7b5dff50f9..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,6 +0,0 @@ -#Mon Nov 25 17:01:58 CST 2019 -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-5.4.1-all.zip diff --git a/legacy/deploy/lite/human_segmentation_demo/app/gradlew b/legacy/deploy/lite/human_segmentation_demo/app/gradlew deleted file mode 100644 index cccdd3d517..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/gradlew +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env sh - -############################################################################## -## -## Gradle start up script for UN*X -## -############################################################################## - -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - -APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" - -warn () { - echo "$*" -} - -die () { - echo - echo "$*" - echo - exit 1 -} - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" - else - JAVACMD="$JAVA_HOME/bin/java" - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD="java" - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." -fi - -# Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi -fi - -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi - -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi - # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" - fi - i=$((i+1)) - done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac -fi - -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=$(save "$@") - -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" - -# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong -if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then - cd "$(dirname "$0")" -fi - -exec "$JAVACMD" "$@" diff --git a/legacy/deploy/lite/human_segmentation_demo/app/gradlew.bat b/legacy/deploy/lite/human_segmentation_demo/app/gradlew.bat deleted file mode 100644 index f9553162f1..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/gradlew.bat +++ /dev/null @@ -1,84 +0,0 @@ -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto init - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/legacy/deploy/lite/human_segmentation_demo/app/local.properties b/legacy/deploy/lite/human_segmentation_demo/app/local.properties deleted file mode 100644 index f3bc0d0f53..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/local.properties +++ /dev/null @@ -1,8 +0,0 @@ -## This file must *NOT* be checked into Version Control Systems, -# as it contains information specific to your local configuration. -# -# Location of the SDK. This is only used by Gradle. -# For customization when using a Version Control System, please read the -# header note. -#Mon Nov 25 17:01:52 CST 2019 -sdk.dir=/Users/chenlingchi/Library/Android/sdk diff --git a/legacy/deploy/lite/human_segmentation_demo/app/proguard-rules.pro b/legacy/deploy/lite/human_segmentation_demo/app/proguard-rules.pro deleted file mode 100644 index f1b424510d..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/proguard-rules.pro +++ /dev/null @@ -1,21 +0,0 @@ -# Add project specific ProGuard rules here. -# You can control the set of applied configuration files using the -# proguardFiles setting in build.gradle. -# -# For more details, see -# http://developer.android.com/guide/developing/tools/proguard.html - -# If your project uses WebView with JS, uncomment the following -# and specify the fully qualified class name to the JavaScript interface -# class: -#-keepclassmembers class fqcn.of.javascript.interface.for.webview { -# public *; -#} - -# Uncomment this to preserve the line number information for -# debugging stack traces. -#-keepattributes SourceFile,LineNumberTable - -# If you keep the line number information, uncomment this to -# hide the original source file name. -#-renamesourcefileattribute SourceFile diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/androidTest/java/com/baidu/paddle/lite/demo/ExampleInstrumentedTest.java b/legacy/deploy/lite/human_segmentation_demo/app/src/androidTest/java/com/baidu/paddle/lite/demo/ExampleInstrumentedTest.java deleted file mode 100644 index 353c3677e5..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/androidTest/java/com/baidu/paddle/lite/demo/ExampleInstrumentedTest.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.baidu.paddle.lite.demo; - -import android.content.Context; -import android.support.test.InstrumentationRegistry; -import android.support.test.runner.AndroidJUnit4; - -import org.junit.Test; -import org.junit.runner.RunWith; - -import static org.junit.Assert.*; - -/** - * Instrumented test, which will execute on an Android device. - * - * @see Testing documentation - */ -@RunWith(AndroidJUnit4.class) -public class ExampleInstrumentedTest { - @Test - public void useAppContext() { - // Context of the app under test. - Context appContext = InstrumentationRegistry.getTargetContext(); - - assertEquals("com.baidu.paddle.lite.demo", appContext.getPackageName()); - } -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/AndroidManifest.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/AndroidManifest.xml deleted file mode 100644 index 39789e0370..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/AndroidManifest.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/images/human.jpg b/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/images/human.jpg deleted file mode 100644 index 234044abb6..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/images/human.jpg and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/labels/label_list b/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/labels/label_list deleted file mode 100644 index b043d376d0..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/assets/image_segmentation/labels/label_list +++ /dev/null @@ -1,2 +0,0 @@ -background -human diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/AppCompatPreferenceActivity.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/AppCompatPreferenceActivity.java deleted file mode 100644 index 314c045620..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/AppCompatPreferenceActivity.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.baidu.paddle.lite.demo.segmentation; - -import android.content.res.Configuration; -import android.os.Bundle; -import android.preference.PreferenceActivity; -import android.support.annotation.LayoutRes; -import android.support.annotation.Nullable; -import android.support.v7.app.ActionBar; -import android.support.v7.app.AppCompatDelegate; -import android.support.v7.widget.Toolbar; -import android.view.MenuInflater; -import android.view.View; -import android.view.ViewGroup; - -/** - * A {@link android.preference.PreferenceActivity} which implements and proxies the necessary calls - * to be used with AppCompat. - *

- * This technique can be used with an {@link android.app.Activity} class, not just - * {@link android.preference.PreferenceActivity}. - */ -public abstract class AppCompatPreferenceActivity extends PreferenceActivity { - private AppCompatDelegate mDelegate; - - @Override - protected void onCreate(Bundle savedInstanceState) { - getDelegate().installViewFactory(); - getDelegate().onCreate(savedInstanceState); - super.onCreate(savedInstanceState); - } - - @Override - protected void onPostCreate(Bundle savedInstanceState) { - super.onPostCreate(savedInstanceState); - getDelegate().onPostCreate(savedInstanceState); - } - - public ActionBar getSupportActionBar() { - return getDelegate().getSupportActionBar(); - } - - public void setSupportActionBar(@Nullable Toolbar toolbar) { - getDelegate().setSupportActionBar(toolbar); - } - - @Override - public MenuInflater getMenuInflater() { - return getDelegate().getMenuInflater(); - } - - @Override - public void setContentView(@LayoutRes int layoutResID) { - getDelegate().setContentView(layoutResID); - } - - @Override - public void setContentView(View view) { - getDelegate().setContentView(view); - } - - @Override - public void setContentView(View view, ViewGroup.LayoutParams params) { - getDelegate().setContentView(view, params); - } - - @Override - public void addContentView(View view, ViewGroup.LayoutParams params) { - getDelegate().addContentView(view, params); - } - - @Override - protected void onPostResume() { - super.onPostResume(); - getDelegate().onPostResume(); - } - - @Override - protected void onTitleChanged(CharSequence title, int color) { - super.onTitleChanged(title, color); - getDelegate().setTitle(title); - } - - @Override - public void onConfigurationChanged(Configuration newConfig) { - super.onConfigurationChanged(newConfig); - getDelegate().onConfigurationChanged(newConfig); - } - - @Override - protected void onStop() { - super.onStop(); - getDelegate().onStop(); - } - - @Override - protected void onDestroy() { - super.onDestroy(); - getDelegate().onDestroy(); - } - - public void invalidateOptionsMenu() { - getDelegate().invalidateOptionsMenu(); - } - - private AppCompatDelegate getDelegate() { - if (mDelegate == null) { - mDelegate = AppCompatDelegate.create(this, null); - } - return mDelegate; - } -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/MainActivity.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/MainActivity.java deleted file mode 100644 index aab9f54c30..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/MainActivity.java +++ /dev/null @@ -1,388 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation; - -import android.Manifest; -import android.app.ProgressDialog; -import android.content.ContentResolver; -import android.content.Intent; -import android.content.SharedPreferences; -import android.content.pm.PackageManager; -import android.database.Cursor; -import android.graphics.Bitmap; -import android.graphics.BitmapFactory; -import android.net.Uri; -import android.os.Bundle; -import android.os.Handler; -import android.os.HandlerThread; -import android.os.Message; -import android.preference.PreferenceManager; -import android.provider.MediaStore; -import android.support.annotation.NonNull; -import android.support.v4.app.ActivityCompat; -import android.support.v4.content.ContextCompat; -import android.support.v7.app.AppCompatActivity; -import android.text.method.ScrollingMovementMethod; -import android.util.Log; -import android.view.Menu; -import android.view.MenuInflater; -import android.view.MenuItem; -import android.widget.ImageView; -import android.widget.TextView; -import android.widget.Toast; - -import com.baidu.paddle.lite.demo.segmentation.config.Config; -import com.baidu.paddle.lite.demo.segmentation.preprocess.Preprocess; -import com.baidu.paddle.lite.demo.segmentation.visual.Visualize; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; - -public class MainActivity extends AppCompatActivity { - - private static final String TAG = MainActivity.class.getSimpleName(); - public static final int OPEN_GALLERY_REQUEST_CODE = 0; - public static final int TAKE_PHOTO_REQUEST_CODE = 1; - - public static final int REQUEST_LOAD_MODEL = 0; - public static final int REQUEST_RUN_MODEL = 1; - public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0; - public static final int RESPONSE_LOAD_MODEL_FAILED = 1; - public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2; - public static final int RESPONSE_RUN_MODEL_FAILED = 3; - - protected ProgressDialog pbLoadModel = null; - protected ProgressDialog pbRunModel = null; - - protected Handler receiver = null; // receive messages from worker thread - protected Handler sender = null; // send command to worker thread - protected HandlerThread worker = null; // worker thread to load&run model - - - protected TextView tvInputSetting; - protected ImageView ivInputImage; - protected TextView tvOutputResult; - protected TextView tvInferenceTime; - - // model config - Config config = new Config(); - - protected Predictor predictor = new Predictor(); - - Preprocess preprocess = new Preprocess(); - - Visualize visualize = new Visualize(); - - @Override - protected void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - setContentView(R.layout.activity_main); - receiver = new Handler() { - @Override - public void handleMessage(Message msg) { - switch (msg.what) { - case RESPONSE_LOAD_MODEL_SUCCESSED: - pbLoadModel.dismiss(); - onLoadModelSuccessed(); - break; - case RESPONSE_LOAD_MODEL_FAILED: - pbLoadModel.dismiss(); - Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show(); - onLoadModelFailed(); - break; - case RESPONSE_RUN_MODEL_SUCCESSED: - pbRunModel.dismiss(); - onRunModelSuccessed(); - break; - case RESPONSE_RUN_MODEL_FAILED: - pbRunModel.dismiss(); - Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show(); - onRunModelFailed(); - break; - default: - break; - } - } - }; - - worker = new HandlerThread("Predictor Worker"); - worker.start(); - sender = new Handler(worker.getLooper()) { - public void handleMessage(Message msg) { - switch (msg.what) { - case REQUEST_LOAD_MODEL: - // load model and reload test image - if (onLoadModel()) { - receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_SUCCESSED); - } else { - receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_FAILED); - } - break; - case REQUEST_RUN_MODEL: - // run model if model is loaded - if (onRunModel()) { - receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_SUCCESSED); - } else { - receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_FAILED); - } - break; - default: - break; - } - } - }; - - tvInputSetting = findViewById(R.id.tv_input_setting); - ivInputImage = findViewById(R.id.iv_input_image); - tvInferenceTime = findViewById(R.id.tv_inference_time); - tvOutputResult = findViewById(R.id.tv_output_result); - tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance()); - tvOutputResult.setMovementMethod(ScrollingMovementMethod.getInstance()); - } - - - public boolean onLoadModel() { - return predictor.init(MainActivity.this, config); - } - - - public boolean onRunModel() { - return predictor.isLoaded() && predictor.runModel(preprocess,visualize); - } - - public void onLoadModelFailed() { - - } - public void onRunModelFailed() { - } - - public void loadModel() { - pbLoadModel = ProgressDialog.show(this, "", "Loading model...", false, false); - sender.sendEmptyMessage(REQUEST_LOAD_MODEL); - } - - public void runModel() { - pbRunModel = ProgressDialog.show(this, "", "Running model...", false, false); - sender.sendEmptyMessage(REQUEST_RUN_MODEL); - } - - public void onLoadModelSuccessed() { - // load test image from file_paths and run model - try { - if (config.imagePath.isEmpty()) { - return; - } - Bitmap image = null; - // read test image file from custom file_paths if the first character of mode file_paths is '/', otherwise read test - // image file from assets - if (!config.imagePath.substring(0, 1).equals("/")) { - InputStream imageStream = getAssets().open(config.imagePath); - image = BitmapFactory.decodeStream(imageStream); - } else { - if (!new File(config.imagePath).exists()) { - return; - } - image = BitmapFactory.decodeFile(config.imagePath); - } - if (image != null && predictor.isLoaded()) { - predictor.setInputImage(image); - runModel(); - } - } catch (IOException e) { - Toast.makeText(MainActivity.this, "Load image failed!", Toast.LENGTH_SHORT).show(); - e.printStackTrace(); - } - } - - public void onRunModelSuccessed() { - // obtain results and update UI - tvInferenceTime.setText("Inference time: " + predictor.inferenceTime() + " ms"); - Bitmap outputImage = predictor.outputImage(); - if (outputImage != null) { - ivInputImage.setImageBitmap(outputImage); - } - tvOutputResult.setText(predictor.outputResult()); - tvOutputResult.scrollTo(0, 0); - } - - - public void onImageChanged(Bitmap image) { - // rerun model if users pick test image from gallery or camera - if (image != null && predictor.isLoaded()) { - predictor.setInputImage(image); - runModel(); - } - } - - public void onImageChanged(String path) { - Bitmap image = BitmapFactory.decodeFile(path); - predictor.setInputImage(image); - runModel(); - } - public void onSettingsClicked() { - startActivity(new Intent(MainActivity.this, SettingsActivity.class)); - } - - @Override - public boolean onCreateOptionsMenu(Menu menu) { - MenuInflater inflater = getMenuInflater(); - inflater.inflate(R.menu.menu_action_options, menu); - return true; - } - - @Override - public boolean onOptionsItemSelected(MenuItem item) { - switch (item.getItemId()) { - case android.R.id.home: - finish(); - break; - case R.id.open_gallery: - if (requestAllPermissions()) { - openGallery(); - } - break; - case R.id.take_photo: - if (requestAllPermissions()) { - takePhoto(); - } - break; - case R.id.settings: - if (requestAllPermissions()) { - // make sure we have SDCard r&w permissions to load model from SDCard - onSettingsClicked(); - } - break; - } - return super.onOptionsItemSelected(item); - } - @Override - public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, - @NonNull int[] grantResults) { - super.onRequestPermissionsResult(requestCode, permissions, grantResults); - if (grantResults[0] != PackageManager.PERMISSION_GRANTED || grantResults[1] != PackageManager.PERMISSION_GRANTED) { - Toast.makeText(this, "Permission Denied", Toast.LENGTH_SHORT).show(); - } - } - @Override - protected void onActivityResult(int requestCode, int resultCode, Intent data) { - super.onActivityResult(requestCode, resultCode, data); - if (resultCode == RESULT_OK && data != null) { - switch (requestCode) { - case OPEN_GALLERY_REQUEST_CODE: - try { - ContentResolver resolver = getContentResolver(); - Uri uri = data.getData(); - Bitmap image = MediaStore.Images.Media.getBitmap(resolver, uri); - String[] proj = {MediaStore.Images.Media.DATA}; - Cursor cursor = managedQuery(uri, proj, null, null, null); - cursor.moveToFirst(); - onImageChanged(image); - } catch (IOException e) { - Log.e(TAG, e.toString()); - } - break; - - case TAKE_PHOTO_REQUEST_CODE: - Bitmap image = (Bitmap) data.getParcelableExtra("data"); - onImageChanged(image); - - break; - default: - break; - } - } - } - private boolean requestAllPermissions() { - if (ContextCompat.checkSelfPermission(this, Manifest.permission.WRITE_EXTERNAL_STORAGE) - != PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this, - Manifest.permission.CAMERA) - != PackageManager.PERMISSION_GRANTED) { - ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE, - Manifest.permission.CAMERA}, - 0); - return false; - } - return true; - } - - private void openGallery() { - Intent intent = new Intent(Intent.ACTION_PICK, null); - intent.setDataAndType(MediaStore.Images.Media.EXTERNAL_CONTENT_URI, "image/*"); - startActivityForResult(intent, OPEN_GALLERY_REQUEST_CODE); - } - - private void takePhoto() { - Intent takePhotoIntent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE); - if (takePhotoIntent.resolveActivity(getPackageManager()) != null) { - startActivityForResult(takePhotoIntent, TAKE_PHOTO_REQUEST_CODE); - } - } - - @Override - public boolean onPrepareOptionsMenu(Menu menu) { - boolean isLoaded = predictor.isLoaded(); - menu.findItem(R.id.open_gallery).setEnabled(isLoaded); - menu.findItem(R.id.take_photo).setEnabled(isLoaded); - return super.onPrepareOptionsMenu(menu); - } - - @Override - protected void onResume() { - Log.i(TAG,"begin onResume"); - super.onResume(); - - SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this); - boolean settingsChanged = false; - String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY), - getString(R.string.MODEL_PATH_DEFAULT)); - String label_path = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY), - getString(R.string.LABEL_PATH_DEFAULT)); - String image_path = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY), - getString(R.string.IMAGE_PATH_DEFAULT)); - settingsChanged |= !model_path.equalsIgnoreCase(config.modelPath); - settingsChanged |= !label_path.equalsIgnoreCase(config.labelPath); - settingsChanged |= !image_path.equalsIgnoreCase(config.imagePath); - int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY), - getString(R.string.CPU_THREAD_NUM_DEFAULT))); - settingsChanged |= cpu_thread_num != config.cpuThreadNum; - String cpu_power_mode = - sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY), - getString(R.string.CPU_POWER_MODE_DEFAULT)); - settingsChanged |= !cpu_power_mode.equalsIgnoreCase(config.cpuPowerMode); - String input_color_format = - sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY), - getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); - settingsChanged |= !input_color_format.equalsIgnoreCase(config.inputColorFormat); - long[] input_shape = - Utils.parseLongsFromString(sharedPreferences.getString(getString(R.string.INPUT_SHAPE_KEY), - getString(R.string.INPUT_SHAPE_DEFAULT)), ","); - - settingsChanged |= input_shape.length != config.inputShape.length; - - if (!settingsChanged) { - for (int i = 0; i < input_shape.length; i++) { - settingsChanged |= input_shape[i] != config.inputShape[i]; - } - } - - if (settingsChanged) { - config.init(model_path,label_path,image_path,cpu_thread_num,cpu_power_mode, - input_color_format,input_shape); - preprocess.init(config); - // update UI - tvInputSetting.setText("Model: " + config.modelPath.substring(config.modelPath.lastIndexOf("/") + 1) + "\n" + "CPU" + - " Thread Num: " + Integer.toString(config.cpuThreadNum) + "\n" + "CPU Power Mode: " + config.cpuPowerMode); - tvInputSetting.scrollTo(0, 0); - // reload model if configure has been changed - loadModel(); - } - } - - @Override - protected void onDestroy() { - if (predictor != null) { - predictor.releaseModel(); - } - worker.quit(); - super.onDestroy(); - } -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Predictor.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Predictor.java deleted file mode 100644 index 27bfe3544a..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Predictor.java +++ /dev/null @@ -1,292 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation; - -import android.content.Context; -import android.graphics.Bitmap; -import android.util.Log; - -import com.baidu.paddle.lite.MobileConfig; -import com.baidu.paddle.lite.PaddlePredictor; -import com.baidu.paddle.lite.PowerMode; -import com.baidu.paddle.lite.Tensor; -import com.baidu.paddle.lite.demo.segmentation.config.Config; - -import com.baidu.paddle.lite.demo.segmentation.preprocess.Preprocess; -import com.baidu.paddle.lite.demo.segmentation.visual.Visualize; - -import java.io.InputStream; -import java.util.Date; -import java.util.Vector; - -public class Predictor { - private static final String TAG = Predictor.class.getSimpleName(); - protected Vector wordLabels = new Vector(); - - Config config = new Config(); - - protected Bitmap inputImage = null; - protected Bitmap scaledImage = null; - protected Bitmap outputImage = null; - protected String outputResult = ""; - protected float preprocessTime = 0; - protected float postprocessTime = 0; - - public boolean isLoaded = false; - public int warmupIterNum = 0; - public int inferIterNum = 1; - protected Context appCtx = null; - public int cpuThreadNum = 1; - public String cpuPowerMode = "LITE_POWER_HIGH"; - public String modelPath = ""; - public String modelName = ""; - protected PaddlePredictor paddlePredictor = null; - protected float inferenceTime = 0; - - public Predictor() { - super(); - } - - public boolean init(Context appCtx, String modelPath, int cpuThreadNum, String cpuPowerMode) { - this.appCtx = appCtx; - isLoaded = loadModel(modelPath, cpuThreadNum, cpuPowerMode); - return isLoaded; - } - - public boolean init(Context appCtx, Config config) { - - if (config.inputShape.length != 4) { - Log.i(TAG, "size of input shape should be: 4"); - return false; - } - if (config.inputShape[0] != 1) { - Log.i(TAG, "only one batch is supported in the image classification demo, you can use any batch size in " + - "your Apps!"); - return false; - } - if (config.inputShape[1] != 1 && config.inputShape[1] != 3) { - Log.i(TAG, "only one/three channels are supported in the image classification demo, you can use any " + - "channel size in your Apps!"); - return false; - } - if (!config.inputColorFormat.equalsIgnoreCase("RGB") && !config.inputColorFormat.equalsIgnoreCase("BGR")) { - Log.i(TAG, "only RGB and BGR color format is supported."); - return false; - } - init(appCtx, config.modelPath, config.cpuThreadNum, config.cpuPowerMode); - - if (!isLoaded()) { - return false; - } - this.config = config; - - return isLoaded; - } - - - public boolean isLoaded() { - return paddlePredictor != null && isLoaded; - } - - protected boolean loadLabel(String labelPath) { - wordLabels.clear(); - // load word labels from file - try { - InputStream assetsInputStream = appCtx.getAssets().open(labelPath); - int available = assetsInputStream.available(); - byte[] lines = new byte[available]; - assetsInputStream.read(lines); - assetsInputStream.close(); - String words = new String(lines); - String[] contents = words.split("\n"); - for (String content : contents) { - wordLabels.add(content); - } - Log.i(TAG, "word label size: " + wordLabels.size()); - } catch (Exception e) { - Log.e(TAG, e.getMessage()); - return false; - } - return true; - } - - public Tensor getInput(int idx) { - if (!isLoaded()) { - return null; - } - return paddlePredictor.getInput(idx); - } - - public Tensor getOutput(int idx) { - if (!isLoaded()) { - return null; - } - return paddlePredictor.getOutput(idx); - } - - protected boolean loadModel(String modelPath, int cpuThreadNum, String cpuPowerMode) { - // release model if exists - releaseModel(); - - // load model - if (modelPath.isEmpty()) { - return false; - } - String realPath = modelPath; - if (!modelPath.substring(0, 1).equals("/")) { - // read model files from custom file_paths if the first character of mode file_paths is '/' - // otherwise copy model to cache from assets - realPath = appCtx.getCacheDir() + "/" + modelPath; - Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath); - } - if (realPath.isEmpty()) { - return false; - } - MobileConfig modelConfig = new MobileConfig(); - modelConfig.setModelDir(realPath); - modelConfig.setThreads(cpuThreadNum); - if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_HIGH")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_HIGH); - } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_LOW")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_LOW); - } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_FULL")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_FULL); - } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_NO_BIND")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_NO_BIND); - } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_HIGH")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_RAND_HIGH); - } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_LOW")) { - modelConfig.setPowerMode(PowerMode.LITE_POWER_RAND_LOW); - } else { - Log.e(TAG, "unknown cpu power mode!"); - return false; - } - paddlePredictor = PaddlePredictor.createPaddlePredictor(modelConfig); - this.cpuThreadNum = cpuThreadNum; - this.cpuPowerMode = cpuPowerMode; - this.modelPath = realPath; - this.modelName = realPath.substring(realPath.lastIndexOf("/") + 1); - return true; - } - - public boolean runModel() { - if (!isLoaded()) { - return false; - } - // warm up - for (int i = 0; i < warmupIterNum; i++){ - paddlePredictor.run(); - } - // inference - Date start = new Date(); - for (int i = 0; i < inferIterNum; i++) { - paddlePredictor.run(); - } - Date end = new Date(); - inferenceTime = (end.getTime() - start.getTime()) / (float) inferIterNum; - return true; - } - - public boolean runModel(Bitmap image) { - setInputImage(image); - return runModel(); - } - - public boolean runModel(Preprocess preprocess, Visualize visualize) { - if (inputImage == null) { - return false; - } - - // set input shape - Tensor inputTensor = getInput(0); - inputTensor.resize(config.inputShape); - - // pre-process image - Date start = new Date(); - - preprocess.init(config); - preprocess.to_array(scaledImage); - - // feed input tensor with pre-processed data - inputTensor.setData(preprocess.inputData); - - Date end = new Date(); - preprocessTime = (float) (end.getTime() - start.getTime()); - - // inference - runModel(); - - start = new Date(); - Tensor outputTensor = getOutput(0); - - // post-process - this.outputImage = visualize.draw(inputImage, outputTensor); - postprocessTime = (float) (end.getTime() - start.getTime()); - - outputResult = new String(); - - return true; - } - public void releaseModel() { - paddlePredictor = null; - isLoaded = false; - cpuThreadNum = 1; - cpuPowerMode = "LITE_POWER_HIGH"; - modelPath = ""; - modelName = ""; - } - - public void setConfig(Config config){ - this.config = config; - } - - public Bitmap inputImage() { - return inputImage; - } - - public Bitmap outputImage() { - return outputImage; - } - - public String outputResult() { - return outputResult; - } - - public float preprocessTime() { - return preprocessTime; - } - - public float postprocessTime() { - return postprocessTime; - } - - public String modelPath() { - return modelPath; - } - - public String modelName() { - return modelName; - } - - public int cpuThreadNum() { - return cpuThreadNum; - } - - public String cpuPowerMode() { - return cpuPowerMode; - } - - public float inferenceTime() { - return inferenceTime; - } - - public void setInputImage(Bitmap image) { - if (image == null) { - return; - } - // scale image to the size of input tensor - Bitmap rgbaImage = image.copy(Bitmap.Config.ARGB_8888, true); - Bitmap scaleImage = Bitmap.createScaledBitmap(rgbaImage, (int) this.config.inputShape[3], (int) this.config.inputShape[2], true); - this.inputImage = rgbaImage; - this.scaledImage = scaleImage; - } - -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/SettingsActivity.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/SettingsActivity.java deleted file mode 100644 index 8f53974d48..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/SettingsActivity.java +++ /dev/null @@ -1,158 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation; - -import android.content.SharedPreferences; -import android.os.Bundle; -import android.preference.CheckBoxPreference; -import android.preference.EditTextPreference; -import android.preference.ListPreference; -import android.support.v7.app.ActionBar; - -import java.util.ArrayList; -import java.util.List; - -public class SettingsActivity extends AppCompatPreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener { - ListPreference lpChoosePreInstalledModel = null; - CheckBoxPreference cbEnableCustomSettings = null; - EditTextPreference etModelPath = null; - EditTextPreference etLabelPath = null; - EditTextPreference etImagePath = null; - ListPreference lpCPUThreadNum = null; - ListPreference lpCPUPowerMode = null; - ListPreference lpInputColorFormat = null; - - - - List preInstalledModelPaths = null; - List preInstalledLabelPaths = null; - List preInstalledImagePaths = null; - List preInstalledCPUThreadNums = null; - List preInstalledCPUPowerModes = null; - List preInstalledInputColorFormats = null; - - - @Override - public void onCreate(Bundle savedInstanceState) { - super.onCreate(savedInstanceState); - addPreferencesFromResource(R.xml.settings); - ActionBar supportActionBar = getSupportActionBar(); - if (supportActionBar != null) { - supportActionBar.setDisplayHomeAsUpEnabled(true); - } - - // initialized pre-installed models - preInstalledModelPaths = new ArrayList(); - preInstalledLabelPaths = new ArrayList(); - preInstalledImagePaths = new ArrayList(); - - preInstalledCPUThreadNums = new ArrayList(); - preInstalledCPUPowerModes = new ArrayList(); - preInstalledInputColorFormats = new ArrayList(); - // add deeplab_mobilenet_for_cpu - preInstalledModelPaths.add(getString(R.string.MODEL_PATH_DEFAULT)); - preInstalledLabelPaths.add(getString(R.string.LABEL_PATH_DEFAULT)); - preInstalledImagePaths.add(getString(R.string.IMAGE_PATH_DEFAULT)); - preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT)); - preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT)); - preInstalledInputColorFormats.add(getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); - // initialize UI components - lpChoosePreInstalledModel = - (ListPreference) findPreference(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY)); - String[] preInstalledModelNames = new String[preInstalledModelPaths.size()]; - for (int i = 0; i < preInstalledModelPaths.size(); i++) { - preInstalledModelNames[i] = - preInstalledModelPaths.get(i).substring(preInstalledModelPaths.get(i).lastIndexOf("/") + 1); - } - lpChoosePreInstalledModel.setEntries(preInstalledModelNames); - lpChoosePreInstalledModel.setEntryValues(preInstalledModelPaths.toArray(new String[preInstalledModelPaths.size()])); - cbEnableCustomSettings = - (CheckBoxPreference) findPreference(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY)); - etModelPath = (EditTextPreference) findPreference(getString(R.string.MODEL_PATH_KEY)); - etModelPath.setTitle("Model Path (SDCard: " + Utils.getSDCardDirectory() + ")"); - etLabelPath = (EditTextPreference) findPreference(getString(R.string.LABEL_PATH_KEY)); - etImagePath = (EditTextPreference) findPreference(getString(R.string.IMAGE_PATH_KEY)); - lpCPUThreadNum = - (ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY)); - lpCPUPowerMode = - (ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY)); - lpInputColorFormat = - (ListPreference) findPreference(getString(R.string.INPUT_COLOR_FORMAT_KEY)); - } - - private void reloadPreferenceAndUpdateUI() { - SharedPreferences sharedPreferences = getPreferenceScreen().getSharedPreferences(); - boolean enableCustomSettings = - sharedPreferences.getBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false); - String modelPath = sharedPreferences.getString(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY), - getString(R.string.MODEL_PATH_DEFAULT)); - int modelIdx = lpChoosePreInstalledModel.findIndexOfValue(modelPath); - if (modelIdx >= 0 && modelIdx < preInstalledModelPaths.size()) { - if (!enableCustomSettings) { - SharedPreferences.Editor editor = sharedPreferences.edit(); - editor.putString(getString(R.string.MODEL_PATH_KEY), preInstalledModelPaths.get(modelIdx)); - editor.putString(getString(R.string.LABEL_PATH_KEY), preInstalledLabelPaths.get(modelIdx)); - editor.putString(getString(R.string.IMAGE_PATH_KEY), preInstalledImagePaths.get(modelIdx)); - editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx)); - editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx)); - editor.putString(getString(R.string.INPUT_COLOR_FORMAT_KEY), - preInstalledInputColorFormats.get(modelIdx)); - editor.commit(); - } - lpChoosePreInstalledModel.setSummary(modelPath); - } - cbEnableCustomSettings.setChecked(enableCustomSettings); - etModelPath.setEnabled(enableCustomSettings); - etLabelPath.setEnabled(enableCustomSettings); - etImagePath.setEnabled(enableCustomSettings); - lpCPUThreadNum.setEnabled(enableCustomSettings); - lpCPUPowerMode.setEnabled(enableCustomSettings); - lpInputColorFormat.setEnabled(enableCustomSettings); - modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY), - getString(R.string.MODEL_PATH_DEFAULT)); - String labelPath = sharedPreferences.getString(getString(R.string.LABEL_PATH_KEY), - getString(R.string.LABEL_PATH_DEFAULT)); - String imagePath = sharedPreferences.getString(getString(R.string.IMAGE_PATH_KEY), - getString(R.string.IMAGE_PATH_DEFAULT)); - String cpuThreadNum = sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY), - getString(R.string.CPU_THREAD_NUM_DEFAULT)); - String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY), - getString(R.string.CPU_POWER_MODE_DEFAULT)); - String inputColorFormat = sharedPreferences.getString(getString(R.string.INPUT_COLOR_FORMAT_KEY), - getString(R.string.INPUT_COLOR_FORMAT_DEFAULT)); - etModelPath.setSummary(modelPath); - etModelPath.setText(modelPath); - etLabelPath.setSummary(labelPath); - etLabelPath.setText(labelPath); - etImagePath.setSummary(imagePath); - etImagePath.setText(imagePath); - lpCPUThreadNum.setValue(cpuThreadNum); - lpCPUThreadNum.setSummary(cpuThreadNum); - lpCPUPowerMode.setValue(cpuPowerMode); - lpCPUPowerMode.setSummary(cpuPowerMode); - lpInputColorFormat.setValue(inputColorFormat); - lpInputColorFormat.setSummary(inputColorFormat); - - } - - @Override - protected void onResume() { - super.onResume(); - getPreferenceScreen().getSharedPreferences().registerOnSharedPreferenceChangeListener(this); - reloadPreferenceAndUpdateUI(); - } - - @Override - protected void onPause() { - super.onPause(); - getPreferenceScreen().getSharedPreferences().unregisterOnSharedPreferenceChangeListener(this); - } - - @Override - public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) { - if (key.equals(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY))) { - SharedPreferences.Editor editor = sharedPreferences.edit(); - editor.putBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false); - editor.commit(); - } - reloadPreferenceAndUpdateUI(); - } -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Utils.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Utils.java deleted file mode 100644 index 3d581592df..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/Utils.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation; - -import android.content.Context; -import android.os.Environment; - -import java.io.*; - -public class Utils { - private static final String TAG = Utils.class.getSimpleName(); - - public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) { - if (srcPath.isEmpty() || dstPath.isEmpty()) { - return; - } - InputStream is = null; - OutputStream os = null; - try { - is = new BufferedInputStream(appCtx.getAssets().open(srcPath)); - os = new BufferedOutputStream(new FileOutputStream(new File(dstPath))); - byte[] buffer = new byte[1024]; - int length = 0; - while ((length = is.read(buffer)) != -1) { - os.write(buffer, 0, length); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } finally { - try { - os.close(); - is.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) { - if (srcDir.isEmpty() || dstDir.isEmpty()) { - return; - } - try { - if (!new File(dstDir).exists()) { - new File(dstDir).mkdirs(); - } - for (String fileName : appCtx.getAssets().list(srcDir)) { - String srcSubPath = srcDir + File.separator + fileName; - String dstSubPath = dstDir + File.separator + fileName; - if (new File(srcSubPath).isDirectory()) { - copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath); - } else { - copyFileFromAssets(appCtx, srcSubPath, dstSubPath); - } - } - } catch (Exception e) { - e.printStackTrace(); - } - } - - public static float[] parseFloatsFromString(String string, String delimiter) { - String[] pieces = string.trim().toLowerCase().split(delimiter); - float[] floats = new float[pieces.length]; - for (int i = 0; i < pieces.length; i++) { - floats[i] = Float.parseFloat(pieces[i].trim()); - } - return floats; - } - - public static long[] parseLongsFromString(String string, String delimiter) { - String[] pieces = string.trim().toLowerCase().split(delimiter); - long[] longs = new long[pieces.length]; - for (int i = 0; i < pieces.length; i++) { - longs[i] = Long.parseLong(pieces[i].trim()); - } - return longs; - } - - public static String getSDCardDirectory() { - return Environment.getExternalStorageDirectory().getAbsolutePath(); - } - - public static boolean isSupportedNPU() { - String hardware = android.os.Build.HARDWARE; - return hardware.equalsIgnoreCase("kirin810") || hardware.equalsIgnoreCase("kirin990"); - } -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/config/Config.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/config/Config.java deleted file mode 100644 index 4f09eb53cb..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/config/Config.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation.config; - -import android.graphics.Bitmap; - -public class Config { - - public String modelPath = ""; - public String labelPath = ""; - public String imagePath = ""; - public int cpuThreadNum = 1; - public String cpuPowerMode = ""; - public String inputColorFormat = ""; - public long[] inputShape = new long[]{}; - - - public void init(String modelPath, String labelPath, String imagePath, int cpuThreadNum, - String cpuPowerMode, String inputColorFormat,long[] inputShape){ - - this.modelPath = modelPath; - this.labelPath = labelPath; - this.imagePath = imagePath; - this.cpuThreadNum = cpuThreadNum; - this.cpuPowerMode = cpuPowerMode; - this.inputColorFormat = inputColorFormat; - this.inputShape = inputShape; - } - - public void setInputShape(Bitmap inputImage){ - this.inputShape[0] = 1; - this.inputShape[1] = 3; - this.inputShape[2] = inputImage.getHeight(); - this.inputShape[3] = inputImage.getWidth(); - - } - -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/preprocess/Preprocess.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/preprocess/Preprocess.java deleted file mode 100644 index d1ce2b02ca..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/preprocess/Preprocess.java +++ /dev/null @@ -1,73 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation.preprocess; - -import android.graphics.Bitmap; -import android.util.Log; - -import com.baidu.paddle.lite.demo.segmentation.config.Config; - -import static android.graphics.Color.blue; -import static android.graphics.Color.green; -import static android.graphics.Color.red; - -public class Preprocess { - - private static final String TAG = Preprocess.class.getSimpleName(); - - Config config; - int channels; - int width; - int height; - - public float[] inputData; - - public void init(Config config){ - this.config = config; - this.channels = (int) config.inputShape[1]; - this.height = (int) config.inputShape[2]; - this.width = (int) config.inputShape[3]; - this.inputData = new float[channels * width * height]; - } - - public boolean to_array(Bitmap inputImage){ - - if (channels == 3) { - int[] channelIdx = null; - if (config.inputColorFormat.equalsIgnoreCase("RGB")) { - channelIdx = new int[]{0, 1, 2}; - } else if (config.inputColorFormat.equalsIgnoreCase("BGR")) { - channelIdx = new int[]{2, 1, 0}; - } else { - Log.i(TAG, "unknown color format " + config.inputColorFormat + ", only RGB and BGR color format is " + - "supported!"); - return false; - } - int[] channelStride = new int[]{width * height, width * height * 2}; - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - int color = inputImage.getPixel(x, y); - float[] rgb = new float[]{(float) red(color) , (float) green(color) , - (float) blue(color)}; - inputData[y * width + x] = rgb[channelIdx[0]] ; - inputData[y * width + x + channelStride[0]] = rgb[channelIdx[1]] ; - inputData[y * width + x + channelStride[1]] = rgb[channelIdx[2]]; - } - } - } else if (channels == 1) { - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - int color = inputImage.getPixel(x, y); - float gray = (float) (red(color) + green(color) + blue(color)); - inputData[y * width + x] = gray; - } - } - } else { - Log.i(TAG, "unsupported channel size " + Integer.toString(channels) + ", only channel 1 and 3 is " + - "supported!"); - return false; - } - return true; - - } - -} diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/visual/Visualize.java b/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/visual/Visualize.java deleted file mode 100644 index 56a2dd245f..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/java/com/baidu/paddle/lite/demo/segmentation/visual/Visualize.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.baidu.paddle.lite.demo.segmentation.visual; - -import android.graphics.Bitmap; -import android.graphics.Canvas; -import android.graphics.Matrix; -import android.graphics.Paint; -import android.util.Log; - -import com.baidu.paddle.lite.Tensor; - -public class Visualize { - private static final String TAG = Visualize.class.getSimpleName(); - - public Bitmap draw(Bitmap inputImage, Tensor outputTensor){ - - final int[] colors_map = {0xFF000000, 0xFFFFFF00}; - - float[] output = outputTensor.getFloatData(); - long outputShape[] = outputTensor.shape(); - long outputSize = 1; - - for (long s : outputShape) { - outputSize *= s; - } - - int[] objectColor = new int[(int)outputSize]; - - for(int i=0;i - - - - - - - - - - diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/drawable/ic_launcher_background.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/drawable/ic_launcher_background.xml deleted file mode 100644 index 0d025f9bf6..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/drawable/ic_launcher_background.xml +++ /dev/null @@ -1,170 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/layout/activity_main.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/layout/activity_main.xml deleted file mode 100644 index 356b0069df..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/layout/activity_main.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/menu/menu_action_options.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/menu/menu_action_options.xml deleted file mode 100644 index fe74758ae5..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/menu/menu_action_options.xml +++ /dev/null @@ -1,21 +0,0 @@ -

- - - - - - - - - - diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml deleted file mode 100644 index eca70cfe52..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml deleted file mode 100644 index eca70cfe52..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - \ No newline at end of file diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher.png deleted file mode 100644 index 898f3ed59a..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png deleted file mode 100644 index dffca3601e..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-hdpi/ic_launcher_round.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher.png deleted file mode 100644 index 64ba76f75e..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png deleted file mode 100644 index dae5e08234..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-mdpi/ic_launcher_round.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png deleted file mode 100644 index e5ed46597e..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png deleted file mode 100644 index 14ed0af350..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png deleted file mode 100644 index b0907cac3b..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png deleted file mode 100644 index d8ae031549..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png deleted file mode 100644 index 2c18de9e66..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png deleted file mode 100644 index beed3cdd2c..0000000000 Binary files a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png and /dev/null differ diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/arrays.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/arrays.xml deleted file mode 100644 index 8e08ad57dd..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/arrays.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - 1 threads - 2 threads - 4 threads - 8 threads - - - 1 - 2 - 4 - 8 - - - HIGH(only big cores) - LOW(only LITTLE cores) - FULL(all cores) - NO_BIND(depends on system) - RAND_HIGH - RAND_LOW - - - LITE_POWER_HIGH - LITE_POWER_LOW - LITE_POWER_FULL - LITE_POWER_NO_BIND - LITE_POWER_RAND_HIGH - LITE_POWER_RAND_LOW - - - BGR color format - RGB color format - - - BGR - RGB - - \ No newline at end of file diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/colors.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/colors.xml deleted file mode 100644 index 69b22338c6..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/colors.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - #008577 - #00574B - #D81B60 - diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/strings.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/strings.xml deleted file mode 100644 index d3ad4ecfc6..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/strings.xml +++ /dev/null @@ -1,20 +0,0 @@ - -Human Segmentation - -CHOOSE_PRE_INSTALLED_MODEL_KEY -ENABLE_CUSTOM_SETTINGS_KEY -MODEL_PATH_KEY -LABEL_PATH_KEY -IMAGE_PATH_KEY -CPU_THREAD_NUM_KEY -CPU_POWER_MODE_KEY -INPUT_COLOR_FORMAT_KEY -INPUT_SHAPE_KEY -image_segmentation/models/deeplab_mobilenet_for_cpu -image_segmentation/labels/label_list -image_segmentation/images/human.jpg -1 -LITE_POWER_HIGH -RGB -1,3,513,513 - diff --git a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/styles.xml b/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/styles.xml deleted file mode 100644 index 853262016a..0000000000 --- a/legacy/deploy/lite/human_segmentation_demo/app/src/main/res/values/styles.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - -