From 74ac5b8dcc8d3b98a9422a801a31976724ee18cc Mon Sep 17 00:00:00 2001 From: Kay Date: Mon, 4 Nov 2024 01:58:27 +0800 Subject: [PATCH 1/3] fix: tree diff string replacement when something is substring of another --- bigtree/tree/helper.py | 51 +++++++++++++++++++++++++++++++++------ tests/tree/test_helper.py | 10 ++++++++ 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/bigtree/tree/helper.py b/bigtree/tree/helper.py index c80ac08c..12b08af9 100644 --- a/bigtree/tree/helper.py +++ b/bigtree/tree/helper.py @@ -5,6 +5,13 @@ from bigtree.tree import construct, export, search from bigtree.utils import assertions, exceptions, iterators +try: + import pandas as pd +except ImportError: # pragma: no cover + from unittest.mock import MagicMock + + pd = MagicMock() + __all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"] BaseNodeT = TypeVar("BaseNodeT", bound=basenode.BaseNode) BinaryNodeT = TypeVar("BinaryNodeT", bound=binarynode.BinaryNode) @@ -237,6 +244,7 @@ def prune_tree( return tree_copy +@exceptions.optional_dependencies_pandas def get_tree_diff( tree: node.Node, other_tree: node.Node, @@ -376,6 +384,7 @@ def get_tree_diff( name_col = "name" path_col = "PATH" indicator_col = "Exists" + tree_sep = tree.sep data, data_other = ( export.tree_to_dataframe( @@ -406,11 +415,12 @@ def get_tree_diff( moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))] moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))] if detail: - _sep = tree.sep node_names_removed = [ - node_removed.split(_sep)[-1] for node_removed in nodes_removed + node_removed.split(tree_sep)[-1] for node_removed in nodes_removed + ] + node_names_added = [ + node_added.split(tree_sep)[-1] for node_added in nodes_added ] - node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added] moved_from_indicator = [ node_name_removed in node_names_added for node_name_removed in node_names_removed @@ -420,6 +430,27 @@ def get_tree_diff( for node_name_added in node_names_added ] + def add_suffix_to_path( + _data: pd.DataFrame, _condition: pd.Series, _original_name: str, _suffix: str + ) -> pd.DataFrame: + """Add suffix to path string + + Args: + _data (pd.DataFrame): original data with path column + _condition (pd.Series): whether to add suffix, contains True/False values + _original_name (str): path prefix to add suffix to + _suffix (str): suffix to add to path column + + Returns: + (pd.DataFrame) + """ + data_replace = _data[_condition] + data_replace[path_col] = data_replace[path_col].str.replace( + _original_name, f"{_original_name} ({suffix})", regex=True + ) + data_not_replace = _data[~_condition] + return data_replace._append(data_not_replace).sort_index() + for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator): if not detail: suffix = "-" @@ -427,8 +458,11 @@ def get_tree_diff( suffix = "moved from" else: suffix = "removed" - data_both[path_col] = data_both[path_col].str.replace( - node_removed, f"{node_removed} ({suffix})", regex=True + condition_node_removed = data_both[path_col].str.endswith( + node_removed + ) | data_both[path_col].str.contains(node_removed + tree_sep) + data_both = add_suffix_to_path( + data_both, condition_node_removed, node_removed, suffix ) for node_added, move_indicator in zip(nodes_added, moved_to_indicator): if not detail: @@ -437,8 +471,11 @@ def get_tree_diff( suffix = "moved to" else: suffix = "added" - data_both[path_col] = data_both[path_col].str.replace( - node_added, f"{node_added} ({suffix})", regex=True + condition_node_added = data_both[path_col].str.endswith(node_added) | data_both[ + path_col + ].str.contains(node_added + tree_sep) + data_both = add_suffix_to_path( + data_both, condition_node_added, node_added, suffix ) # Check tree attribute difference diff --git a/tests/tree/test_helper.py b/tests/tree/test_helper.py index 74cecfce..ba7d26ff 100644 --- a/tests/tree/test_helper.py +++ b/tests/tree/test_helper.py @@ -235,6 +235,16 @@ def test_tree_diff(tree_node): ) assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff) + @staticmethod + def test_tree_diff_same_prefix(): + tree_node = node.Node( + "a", children=[node.Node("bb", children=[node.Node("b")])] + ) + other_tree_node = node.Node("a", children=[node.Node("b")]) + tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node) + expected_str = "a\n" "├── b (+)\n" "└── bb (-)\n" " └── b (-)" + assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff) + @staticmethod def test_tree_diff_diff_sep_error(tree_node): other_tree_node = helper.prune_tree(tree_node, "a/c") From 475f4dccb6981642c0a4dfa298815bc458d3bd40 Mon Sep 17 00:00:00 2001 From: Kay Date: Mon, 4 Nov 2024 02:00:17 +0800 Subject: [PATCH 2/3] fix: update CHANGELOG --- CHANGELOG.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 581f1c88..44713d12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed: - Misc: Doctest for docstrings, docstring to indicate usage prefers `node_name` to `name`. - Tree Export: Mermaid diagram title to add newline. +- Tree Helper: Get tree diff string replacement bug when the path change is substring of another path. ## [0.22.1] - 2024-11-03 ### Added: @@ -21,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.22.0] - 2024-11-03 ### Added: -- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false. +- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default +it is false. ## [0.21.3] - 2024-10-16 ### Added: @@ -96,10 +98,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.19.0] - 2024-06-15 ### Changed: -- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more -object-oriented way of parsing arguments. -This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`. -The argument `custom_style` is deprecated, and argument `style` is used instead. +- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more object-oriented +way of parsing arguments. This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`. The +argument `custom_style` is deprecated, and argument `style` is used instead. **This might not be backwards-compatible!** - Misc: Update docstrings to be more comprehensive for tree constructor and tree exporter. - Misc: Update documentation badges and conda information. From 3841978bea66069e65995c4ab539ff0a6a4747c3 Mon Sep 17 00:00:00 2001 From: Kay Date: Mon, 4 Nov 2024 02:06:50 +0800 Subject: [PATCH 3/3] fix: test case --- tests/tree/test_helper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/tree/test_helper.py b/tests/tree/test_helper.py index ba7d26ff..8526896d 100644 --- a/tests/tree/test_helper.py +++ b/tests/tree/test_helper.py @@ -242,7 +242,14 @@ def test_tree_diff_same_prefix(): ) other_tree_node = node.Node("a", children=[node.Node("b")]) tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node) - expected_str = "a\n" "├── b (+)\n" "└── bb (-)\n" " └── b (-)" + # fmt: off + expected_str = ( + "a\n" + "├── b (+)\n" + "└── bb (-)\n" + " └── b (-)\n" + ) + # fmt: on assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff) @staticmethod