Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix get_tree_diff if path are substring of a modified path #318

Merged
merged 3 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed:
- Misc: Doctest for docstrings, docstring to indicate usage prefers `node_name` to `name`.
- Tree Export: Mermaid diagram title to add newline.
- Tree Helper: Get tree diff string replacement bug when the path change is substring of another path.

## [0.22.1] - 2024-11-03
### Added:
Expand All @@ -21,7 +22,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.22.0] - 2024-11-03
### Added:
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false.
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default
it is false.

## [0.21.3] - 2024-10-16
### Added:
Expand Down Expand Up @@ -96,10 +98,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [0.19.0] - 2024-06-15
### Changed:
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more
object-oriented way of parsing arguments.
This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`.
The argument `custom_style` is deprecated, and argument `style` is used instead.
- Tree Exporter: Print functions to accept custom style that is implemented as dataclass, this is a more object-oriented
way of parsing arguments. This affects functions `print_tree`, `yield_tree`, `hprint_tree`, and `hyield_tree`. The
argument `custom_style` is deprecated, and argument `style` is used instead.
**This might not be backwards-compatible!**
- Misc: Update docstrings to be more comprehensive for tree constructor and tree exporter.
- Misc: Update documentation badges and conda information.
Expand Down
51 changes: 44 additions & 7 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
from bigtree.tree import construct, export, search
from bigtree.utils import assertions, exceptions, iterators

try:
import pandas as pd
except ImportError: # pragma: no cover
from unittest.mock import MagicMock

pd = MagicMock()

__all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"]
BaseNodeT = TypeVar("BaseNodeT", bound=basenode.BaseNode)
BinaryNodeT = TypeVar("BinaryNodeT", bound=binarynode.BinaryNode)
Expand Down Expand Up @@ -237,6 +244,7 @@ def prune_tree(
return tree_copy


@exceptions.optional_dependencies_pandas
def get_tree_diff(
tree: node.Node,
other_tree: node.Node,
Expand Down Expand Up @@ -376,6 +384,7 @@ def get_tree_diff(
name_col = "name"
path_col = "PATH"
indicator_col = "Exists"
tree_sep = tree.sep

data, data_other = (
export.tree_to_dataframe(
Expand Down Expand Up @@ -406,11 +415,12 @@ def get_tree_diff(
moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
if detail:
_sep = tree.sep
node_names_removed = [
node_removed.split(_sep)[-1] for node_removed in nodes_removed
node_removed.split(tree_sep)[-1] for node_removed in nodes_removed
]
node_names_added = [
node_added.split(tree_sep)[-1] for node_added in nodes_added
]
node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added]
moved_from_indicator = [
node_name_removed in node_names_added
for node_name_removed in node_names_removed
Expand All @@ -420,15 +430,39 @@ def get_tree_diff(
for node_name_added in node_names_added
]

def add_suffix_to_path(
_data: pd.DataFrame, _condition: pd.Series, _original_name: str, _suffix: str
) -> pd.DataFrame:
"""Add suffix to path string

Args:
_data (pd.DataFrame): original data with path column
_condition (pd.Series): whether to add suffix, contains True/False values
_original_name (str): path prefix to add suffix to
_suffix (str): suffix to add to path column

Returns:
(pd.DataFrame)
"""
data_replace = _data[_condition]
data_replace[path_col] = data_replace[path_col].str.replace(
_original_name, f"{_original_name} ({suffix})", regex=True
)
data_not_replace = _data[~_condition]
return data_replace._append(data_not_replace).sort_index()

for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator):
if not detail:
suffix = "-"
elif move_indicator:
suffix = "moved from"
else:
suffix = "removed"
data_both[path_col] = data_both[path_col].str.replace(
node_removed, f"{node_removed} ({suffix})", regex=True
condition_node_removed = data_both[path_col].str.endswith(
node_removed
) | data_both[path_col].str.contains(node_removed + tree_sep)
data_both = add_suffix_to_path(
data_both, condition_node_removed, node_removed, suffix
)
for node_added, move_indicator in zip(nodes_added, moved_to_indicator):
if not detail:
Expand All @@ -437,8 +471,11 @@ def get_tree_diff(
suffix = "moved to"
else:
suffix = "added"
data_both[path_col] = data_both[path_col].str.replace(
node_added, f"{node_added} ({suffix})", regex=True
condition_node_added = data_both[path_col].str.endswith(node_added) | data_both[
path_col
].str.contains(node_added + tree_sep)
data_both = add_suffix_to_path(
data_both, condition_node_added, node_added, suffix
)

# Check tree attribute difference
Expand Down
17 changes: 17 additions & 0 deletions tests/tree/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,23 @@ def test_tree_diff(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_same_prefix():
tree_node = node.Node(
"a", children=[node.Node("bb", children=[node.Node("b")])]
)
other_tree_node = node.Node("a", children=[node.Node("b")])
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node)
# fmt: off
expected_str = (
"a\n"
"├── b (+)\n"
"└── bb (-)\n"
" └── b (-)\n"
)
# fmt: on
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_diff_sep_error(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
Expand Down
Loading