Skip to content

Commit

Permalink
Merge pull request #318 from kayjan/bugfix/tree-diff-substring
Browse files Browse the repository at this point in the history
Fix get_tree_diff if path are substring of a modified path
  • Loading branch information
kayjan authored Nov 3, 2024
2 parents 502290d + 3841978 commit ffb365f
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 12 deletions.
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed:
- Misc: Doctest for docstrings, docstring to indicate usage prefers `node_name` to `name`.
- Tree Export: Mermaid diagram title to add newline.
- Tree Helper: Get tree diff string replacement bug when the path change is substring of another path.

## [0.22.1] - 2024-11-03
### Added:
Expand All @@ -21,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.22.0] - 2024-11-03
### Added:
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false.
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default
it is false.

## [0.21.3] - 2024-10-16
### Added:
Expand Down Expand Up @@ -96,10 +98,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.19.0] - 2024-06-15
### Changed:
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more
object-oriented way of parsing arguments.
This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`.
The argument `custom_style` is deprecated, and argument `style` is used instead.
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more object-oriented
way of parsing arguments. This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`. The
argument `custom_style` is deprecated, and argument `style` is used instead.
**This might not be backwards-compatible!**
- Misc: Update docstrings to be more comprehensive for tree constructor and tree exporter.
- Misc: Update documentation badges and conda information.
Expand Down
51 changes: 44 additions & 7 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from bigtree.tree import construct, export, search
from bigtree.utils import assertions, exceptions, iterators

try:
import pandas as pd
except ImportError: # pragma: no cover
from unittest.mock import MagicMock

pd = MagicMock()

__all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"]
BaseNodeT = TypeVar("BaseNodeT", bound=basenode.BaseNode)
BinaryNodeT = TypeVar("BinaryNodeT", bound=binarynode.BinaryNode)
Expand Down Expand Up @@ -237,6 +244,7 @@ def prune_tree(
return tree_copy


@exceptions.optional_dependencies_pandas
def get_tree_diff(
tree: node.Node,
other_tree: node.Node,
Expand Down Expand Up @@ -376,6 +384,7 @@ def get_tree_diff(
name_col = "name"
path_col = "PATH"
indicator_col = "Exists"
tree_sep = tree.sep

data, data_other = (
export.tree_to_dataframe(
Expand Down Expand Up @@ -406,11 +415,12 @@ def get_tree_diff(
moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
if detail:
_sep = tree.sep
node_names_removed = [
node_removed.split(_sep)[-1] for node_removed in nodes_removed
node_removed.split(tree_sep)[-1] for node_removed in nodes_removed
]
node_names_added = [
node_added.split(tree_sep)[-1] for node_added in nodes_added
]
node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added]
moved_from_indicator = [
node_name_removed in node_names_added
for node_name_removed in node_names_removed
Expand All @@ -420,15 +430,39 @@ def get_tree_diff(
for node_name_added in node_names_added
]

def add_suffix_to_path(
_data: pd.DataFrame, _condition: pd.Series, _original_name: str, _suffix: str
) -> pd.DataFrame:
"""Add suffix to path string
Args:
_data (pd.DataFrame): original data with path column
_condition (pd.Series): whether to add suffix, contains True/False values
_original_name (str): path prefix to add suffix to
_suffix (str): suffix to add to path column
Returns:
(pd.DataFrame)
"""
data_replace = _data[_condition]
data_replace[path_col] = data_replace[path_col].str.replace(
_original_name, f"{_original_name} ({suffix})", regex=True
)
data_not_replace = _data[~_condition]
return data_replace._append(data_not_replace).sort_index()

for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator):
if not detail:
suffix = "-"
elif move_indicator:
suffix = "moved from"
else:
suffix = "removed"
data_both[path_col] = data_both[path_col].str.replace(
node_removed, f"{node_removed} ({suffix})", regex=True
condition_node_removed = data_both[path_col].str.endswith(
node_removed
) | data_both[path_col].str.contains(node_removed + tree_sep)
data_both = add_suffix_to_path(
data_both, condition_node_removed, node_removed, suffix
)
for node_added, move_indicator in zip(nodes_added, moved_to_indicator):
if not detail:
Expand All @@ -437,8 +471,11 @@ def get_tree_diff(
suffix = "moved to"
else:
suffix = "added"
data_both[path_col] = data_both[path_col].str.replace(
node_added, f"{node_added} ({suffix})", regex=True
condition_node_added = data_both[path_col].str.endswith(node_added) | data_both[
path_col
].str.contains(node_added + tree_sep)
data_both = add_suffix_to_path(
data_both, condition_node_added, node_added, suffix
)

# Check tree attribute difference
Expand Down
17 changes: 17 additions & 0 deletions tests/tree/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,23 @@ def test_tree_diff(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_same_prefix():
tree_node = node.Node(
"a", children=[node.Node("bb", children=[node.Node("b")])]
)
other_tree_node = node.Node("a", children=[node.Node("b")])
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node)
# fmt: off
expected_str = (
"a\n"
"├── b (+)\n"
"└── bb (-)\n"
" └── b (-)\n"
)
# fmt: on
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_diff_sep_error(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
Expand Down

0 comments on commit ffb365f

Please sign in to comment.