diff --git a/README.MD b/README.MD index 25a6aa1b..b5cbf3b8 100644 --- a/README.MD +++ b/README.MD @@ -116,6 +116,7 @@ When this error has occurred, please check the network environment. ## Update **If the dependency package error after updating, please double clicking ```repair_dependency.bat``` (for Official ComfyUI Protable) or ```repair_dependency_aki.bat``` (for ComfyUI-aki-v1.x) in the plugin folder to reinstall the dependency packages.
+* ObjectDetector nodes add sort by confidence option. * Commit [DrawBBoxMask](#DrawBBoxMask) node, used to convert the BBoxes output by the Object Detector node into a mask. * Commit [UserPromptGeneratorTxtImg](#UserPromptGeneratorTxtImg) and [UserPromptGeneratorReplaceWord](#UserPromptGeneratorReplaceWord) nodes, Used to generate text and image prompts and replace prompt content. * Commit [PhiPrompt](#PhiPrompt) node, Use Microsoft Phi 3.5 text and visual models for local inference. Can be used to generate prompt words, process prompt words, or infer prompt words from images. Running this model requires at least 16GB of video memory. @@ -1786,7 +1787,7 @@ Node Options: * image: The image to segment. * florence2_model: Florence2 model, it from [LoadFlorence2Model](#LoadFlorence2Model) node. * prompt: Describe the object that needs to be identified. -* sort_method: The selection box sorting method has 3 options: "left_to_right", "top_to_bottom" and "big_to_small". +* sort_method: The selection box sorting method has 4 options: "left_to_right", "top_to_bottom", "big_to_small" and "confidence". * bbox_select: Select the input box data. There are three options: "all" to select all, "first" to select the box with the highest confidence, and "by_index" to specify the index of the box. * select_index: This option is valid when bbox_delect is 'by_index'. 0 is the first one. Multiple values can be entered, separated by any non numeric character, including but not limited to commas, periods, semicolons, spaces or letters, and even Chinese. @@ -1800,7 +1801,7 @@ Node Options: * confidence_threshold: The threshold of confidence. * nms_iou_threshold: The threshold of Non-Maximum Suppression. * prompt: Describe the object that needs to be identified. -* sort_method: The selection box sorting method has 3 options: "left_to_right", "top_to_bottom" and "big_to_small". +* sort_method: The selection box sorting method has 4 options: "left_to_right", "top_to_bottom", "big_to_small" and "confidence". * bbox_select: Select the input box data. There are three options: "all" to select all, "first" to select the box with the highest confidence, and "by_index" to specify the index of the box. * select_index: This option is valid when bbox_delect is 'by_index'. 0 is the first one. Multiple values can be entered, separated by any non numeric character, including but not limited to commas, periods, semicolons, spaces or letters, and even Chinese. @@ -1812,7 +1813,7 @@ Node Options: ![image](image/object_detector_yolo8_node.jpg) * image: The image to segment. * yolo_model: Choose the yolo model. -* sort_method: The selection box sorting method has 3 options: "left_to_right", "top_to_bottom" and "big_to_small". +* sort_method: The selection box sorting method has 4 options: "left_to_right", "top_to_bottom", "big_to_small" and "confidence". * bbox_select: Select the input box data. There are three options: "all" to select all, "first" to select the box with the highest confidence, and "by_index" to specify the index of the box. * select_index: This option is valid when bbox_delect is 'by_index'. 0 is the first one. Multiple values can be entered, separated by any non numeric character, including but not limited to commas, periods, semicolons, spaces or letters, and even Chinese. @@ -1822,7 +1823,7 @@ Use mask as recognition box data. All areas surrounded by white areas on the mas Node Options: ![image](image/object_detector_mask_node.jpg) * object_mask: The mask input. -* sort_method: The selection box sorting method has 3 options: "left_to_right", "top_to_bottom" and "big_to_small". +* sort_method: The selection box sorting method has 4 options: "left_to_right", "top_to_bottom", "big_to_small" and "confidence". * bbox_select: Select the input box data. There are three options: "all" to select all, "first" to select the box with the highest confidence, and "by_index" to specify the index of the box. * select_index: This option is valid when bbox_delect is 'by_index'. 0 is the first one. Multiple values can be entered, separated by any non numeric character, including but not limited to commas, periods, semicolons, spaces or letters, and even Chinese. diff --git a/README_CN.MD b/README_CN.MD index 8ba9cfa0..601da4f2 100644 --- a/README_CN.MD +++ b/README_CN.MD @@ -116,6 +116,7 @@ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' ## 更新说明 **如果本插件更新后出现依赖包错误,请双击运行插件目录下的```install_requirements.bat```(官方便携包),或 ```install_requirements_aki.bat```(秋叶整合包) 重新安装依赖包。 +* ObjectDetector 节点添加按置信度排序选项。 * 添加 [DrawBBoxMask](#DrawBBoxMask) 节点,用于将 ObjectDetector 节点输出的BBox转为遮罩。 * 添加 [UserPromptGeneratorTxtImg](#UserPromptGeneratorTxtImg) 以及 [UserPromptGeneratorReplaceWord](#UserPromptGeneratorReplaceWord) 节点, 用于生成文生图提示词和替换提示词内容。 * 添加 [PhiPrompt](#PhiPrompt) 节点,使用Micrisoft Phi 3.5文字及视觉模型进行本地推理。可以用于生成提示词,加工提示词或者反推图片的提示词。运行这个模型需要至少16GB的显存。 @@ -1757,7 +1758,7 @@ https://github.com/user-attachments/assets/b2a45c96-4be1-4470-8ceb-addaf301b0cb * image: 图片输入。 * florence2_model: Florence2模型。从[Florence2模型加载器](#LoadFlorence2Model)输入。 * prompt: 描述需要识别的对象。 -* sort_method: 选择框排序方法, 有3个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序。 +* sort_method: 选择框排序方法, 有4个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序,"confidence"为按置信度排序。 * bbox_select: 选择输入的框数据。有3个选项:"all"为全部选择,"first"为选择置信度最高的框,"by_index"可以指定框的索引。 * select_index: 当bbox_select为"by_index"时,此选项有效。0为第一张。可以输入多个值,中间用任意非数字字符分隔,包括不仅限于逗号,句号,分号,空格或者字母,甚至中文。 @@ -1772,7 +1773,7 @@ https://github.com/user-attachments/assets/b2a45c96-4be1-4470-8ceb-addaf301b0cb * confidence_threshold: 置信度阈值。 * nms_iou_threshold: 非极大值抑制阈值。 * prompt: 描述需要识别的对象。 -* sort_method: 选择框排序方法, 有3个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序。 +* sort_method: 选择框排序方法, 有4个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序,"confidence"为按置信度排序。 * bbox_select: 选择输入的框数据。有3个选项:"all"为全部选择,"first"为选择置信度最高的框,"by_index"可以指定框的索引。 * select_index: 当bbox_select为"by_index"时,此选项有效。0为第一张。可以输入多个值,中间用任意非数字字符分隔,包括不仅限于逗号,句号,分号,空格或者字母,甚至中文。 @@ -1784,7 +1785,7 @@ https://github.com/user-attachments/assets/b2a45c96-4be1-4470-8ceb-addaf301b0cb ![image](image/object_detector_yolo8_node.jpg) * image: 图片输入。 * yolo_model: 选择yolo模型。 -* sort_method: 选择框排序方法, 有3个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序。 +* sort_method: 选择框排序方法, 有4个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序,"confidence"为按置信度排序。 * bbox_select: 选择输入的框数据。有3个选项:"all"为全部选择,"first"为选择置信度最高的框,"by_index"可以指定框的索引。 * select_index: 当bbox_select为"by_index"时,此选项有效。0为第一张。可以输入多个值,中间用任意非数字字符分隔,包括不仅限于逗号,句号,分号,空格或者字母,甚至中文。 @@ -1794,7 +1795,7 @@ https://github.com/user-attachments/assets/b2a45c96-4be1-4470-8ceb-addaf301b0cb 节点选项说明: ![image](image/object_detector_mask_node.jpg) * object_mask: 遮罩输入。 -* sort_method: 选择框排序方法, 有3个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序。 +* sort_method: 选择框排序方法, 有4个选项:"left_to_right"为从左到右排序,"top_to_bottom"为从上到下排序,"big_to_small"为从大到小排序,"confidence"为默认排序。 * bbox_select: 选择输入的框数据。有3个选项:"all"为全部选择,"first"为选择置信度最高的框,"by_index"可以指定框的索引。 * select_index: 当bbox_select为"by_index"时,此选项有效。0为第一张。可以输入多个值,中间用任意非数字字符分隔,包括不仅限于逗号,句号,分号,空格或者字母,甚至中文。 diff --git a/py/object_detector.py b/py/object_detector.py index 17335041..f97ec801 100644 --- a/py/object_detector.py +++ b/py/object_detector.py @@ -2,7 +2,7 @@ from .imagefunc import * select_list = ["all", "first", "by_index"] -sort_method_list = ["left_to_right", "top_to_bottom", "big_to_small"] +sort_method_list = ["left_to_right", "top_to_bottom", "big_to_small", "confidence"] # 规范bbox,保证x1 < x2, y1 < y2, 并返回int @@ -22,8 +22,10 @@ def sort_bboxes(bboxes:list, method:str) -> list: sorted_bboxes = sorted(bboxes, key=lambda bbox: bbox[0]) elif method == "top_to_bottom": sorted_bboxes = sorted(bboxes, key=lambda bbox: bbox[1]) - else:# bit_to_small + elif method == "big_to_small": sorted_bboxes = sorted(bboxes, key=lambda bbox: (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]), reverse=True) + else: + sorted_bboxes = bboxes return sorted_bboxes def select_bboxes(bboxes:list, bbox_select:str, select_index:str) -> list: diff --git a/pyproject.toml b/pyproject.toml index dcf7edb8..16712df8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "comfyui_layerstyle" description = "A set of nodes for ComfyUI it generate image like Adobe Photoshop's Layer Style. the Drop Shadow is first completed node, and follow-up work is in progress." -version = "1.0.58" +version = "1.0.59" license = "MIT" dependencies = ["numpy", "pillow", "torch", "matplotlib", "Scipy", "scikit_image", "scikit_learn", "opencv-contrib-python", "pymatting", "segment_anything", "timm", "addict", "yapf", "colour-science", "wget", "mediapipe", "loguru", "typer_config", "fastapi", "rich", "google-generativeai", "diffusers", "omegaconf", "tqdm", "transformers", "kornia", "image-reward", "ultralytics", "blend_modes", "blind-watermark", "qrcode", "pyzbar", "transparent-background", "huggingface_hub", "accelerate", "bitsandbytes", "torchscale", "wandb", "hydra-core", "psd-tools", "inference-cli[yolo-world]", "inference-gpu[yolo-world]", "onnxruntime"]