Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add detail to get_tree_diff #310

Merged
merged 3 commits into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.22.0] - 2024-11-03
### Added:
- Tree Helper: Accept parameter `detail` to show the different types of shift e.g., moved / added / removed. By default it is false.

## [0.21.3] - 2024-10-16
### Added:
- Tree Node: Docstring indentation and additional information for Node creation.
Expand Down Expand Up @@ -668,7 +672,8 @@ ignore null attribute columns.
- Utility Iterator: Tree traversal methods.
- Workflow To Do App: Tree use case with to-do list implementation.

[Unreleased]: https://github.com/kayjan/bigtree/compare/0.21.3...HEAD
[Unreleased]: https://github.com/kayjan/bigtree/compare/0.22.0...HEAD
[0.22.0]: https://github.com/kayjan/bigtree/compare/0.21.3...0.22.0
[0.21.3]: https://github.com/kayjan/bigtree/compare/0.21.2...0.21.3
[0.21.2]: https://github.com/kayjan/bigtree/compare/0.21.1...0.21.2
[0.21.1]: https://github.com/kayjan/bigtree/compare/0.21.0...0.21.1
Expand Down
2 changes: 1 addition & 1 deletion bigtree/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.21.3"
__version__ = "0.22.0"

from bigtree.binarytree.construct import list_to_binarytree
from bigtree.dag.construct import dataframe_to_dag, dict_to_dag, list_to_dag
Expand Down
52 changes: 48 additions & 4 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ def get_tree_diff(
tree: node.Node,
other_tree: node.Node,
only_diff: bool = True,
detail: bool = False,
attr_list: List[str] = [],
fallback_sep: str = "/",
) -> node.Node:
Expand All @@ -255,6 +256,9 @@ def get_tree_diff(
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.

If `detail=True`, (added) and (moved to) will be used instead of (+), (removed) and (moved from)
will be used instead of (-).

!!! note

- tree and other_tree must have the same `sep` symbol, otherwise this will raise ValueError
Expand Down Expand Up @@ -298,6 +302,15 @@ def get_tree_diff(
├── file1.doc
└── photo2.jpg (-)

>>> tree_diff = get_tree_diff(root, root_other, detail=True)
>>> tree_diff.show()
Downloads
├── Pictures
│ ├── photo1.jpg
│ └── photo2.jpg (moved to)
├── file1.doc
└── photo2.jpg (moved from)

Comparing tree attributes

- (~) will be added to node name if there are differences in tree attributes defined in `attr_list`.
Expand Down Expand Up @@ -339,6 +352,7 @@ def get_tree_diff(
tree (Node): tree to be compared against
other_tree (Node): tree to be compared with
only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True
detail (bool): indicator to differentiate between different types of diff e.g., added or removed or moved
attr_list (List[str]): tree attributes to check for difference, defaults to empty list
fallback_sep (str): sep to fall back to if tree and other_tree has sep that clashes with symbols "+" / "-" / "~".
All node names in tree and other_tree should not contain this fallback_sep, defaults to "/"
Expand Down Expand Up @@ -388,13 +402,43 @@ def get_tree_diff(
nodes_added = list(data_both[data_both[indicator_col] == "right_only"][path_col])[
::-1
]
for node_removed in nodes_removed:

moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
if detail:
_sep = tree.sep
node_names_removed = [
node_removed.split(_sep)[-1] for node_removed in nodes_removed
]
node_names_added = [node_added.split(_sep)[-1] for node_added in nodes_added]
moved_from_indicator = [
node_name_removed in node_names_added
for node_name_removed in node_names_removed
]
moved_to_indicator = [
node_name_added in node_names_removed
for node_name_added in node_names_added
]

for node_removed, move_indicator in zip(nodes_removed, moved_from_indicator):
if not detail:
suffix = "-"
elif move_indicator:
suffix = "moved from"
else:
suffix = "removed"
data_both[path_col] = data_both[path_col].str.replace(
node_removed, f"{node_removed} (-)", regex=True
node_removed, f"{node_removed} ({suffix})", regex=True
)
for node_added in nodes_added:
for node_added, move_indicator in zip(nodes_added, moved_to_indicator):
if not detail:
suffix = "+"
elif move_indicator:
suffix = "moved to"
else:
suffix = "added"
data_both[path_col] = data_both[path_col].str.replace(
node_added, f"{node_added} (+)", regex=True
node_added, f"{node_added} ({suffix})", regex=True
)

# Check tree attribute difference
Expand Down
37 changes: 36 additions & 1 deletion docs/gettingstarted/demo/tree.md
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,11 @@ To compare tree attributes:
- `(-)`: Node is removed in second tree
- `(~)`: Node has different attributes, only available when comparing attributes

For more details, `(moved from)`, `(moved to)`, `(added)`, and `(removed)` can
be indicated instead if `(+)` and `(-)`.

=== "Only differences"
```python hl_lines="20 29"
```python hl_lines="20"
from bigtree import str_to_tree, get_tree_diff

root = str_to_tree("""
Expand Down Expand Up @@ -1015,6 +1018,38 @@ To compare tree attributes:
# ├── f (-)
# └── g (+)
```
=== "With details"
```python hl_lines="21"
from bigtree import str_to_tree, get_tree_diff

root = str_to_tree("""
a
├── b
│ ├── d
│ └── e
└── c
└── f
""")

root_other = str_to_tree("""
a
├── b
│ └── g
└── c
├── d
└── f
""")

tree_diff = get_tree_diff(root, root_other, detail=True)
tree_diff.show()
# a
# ├── b
# │ ├── d (moved from)
# │ ├── e (removed)
# │ └── g (added)
# └── c
# └── d (moved to)
```
=== "Attribute difference"
```python hl_lines="25"
from bigtree import Node, get_tree_diff
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ dependencies = [
]

[tool.hatch.envs.default.scripts]
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=bigtree {args} --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=bigtree {args}"
cov-benchmark = "cov && --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
no-cov = "test && coverage report --show-missing --omit='*/workflows/*' --benchmark-autosave --benchmark-histogram=.benchmarks/histogram --benchmark-json .benchmarks/output.json"
test = "pytest . {args}"
lint = "black -l 88 ."
Expand Down
5 changes: 5 additions & 0 deletions tests/tree/test_export.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import unittest

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -1140,6 +1141,10 @@ def test_tree_to_polars_name_col_missing(tree_node):
assert expected.equals(actual)

@staticmethod
@unittest.skipIf(
tuple(map(int, pl.__version__.split(".")[:2])) > (1, 9),
reason="Not compatible with polars>1.9.0",
)
def test_tree_to_polars_name_path_col_missing(tree_node):
expected = pl.DataFrame()
expected.index = range(8)
Expand Down
109 changes: 108 additions & 1 deletion tests/tree/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from bigtree.node import basenode, node
from bigtree.tree import export, helper
from bigtree.tree import export, helper, modify
from bigtree.utils import exceptions
from tests.conftest import assert_print_statement
from tests.node.test_basenode import (
Expand Down Expand Up @@ -288,6 +288,22 @@ def test_tree_diff_forbidden_sep(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_detail(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
_ = node.Node("i", parent=other_tree_node)
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node, detail=True)
expected_str = (
"a\n"
"├── b (removed)\n"
"│ ├── d (removed)\n"
"│ └── e (removed)\n"
"│ ├── g (removed)\n"
"│ └── h (removed)\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_all_diff(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
Expand All @@ -306,6 +322,50 @@ def test_tree_diff_all_diff(tree_node):
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

@staticmethod
def test_tree_diff_all_diff_detail(tree_node):
other_tree_node = helper.prune_tree(tree_node, "a/c")
_ = node.Node("i", parent=other_tree_node)
tree_diff = helper.get_tree_diff(
tree_node, other_tree_node, only_diff=False, detail=True
)
expected_str = (
"a\n"
"├── b (removed)\n"
"│ ├── d (removed)\n"
"│ └── e (removed)\n"
"│ ├── g (removed)\n"
"│ └── h (removed)\n"
"├── c\n"
"│ └── f\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

@staticmethod
def test_tree_diff_detail_move(tree_node):
other_tree_node = tree_node.copy()
modify.shift_nodes(
other_tree_node, from_paths=["a/b/d", "a/b"], to_paths=[None, "a/c/b"]
)
_ = node.Node("i", parent=other_tree_node)
tree_only_diff = helper.get_tree_diff(tree_node, other_tree_node, detail=True)
expected_str = (
"a\n"
"├── b (moved from)\n"
"│ ├── d (removed)\n"
"│ └── e (moved from)\n"
"│ ├── g (moved from)\n"
"│ └── h (moved from)\n"
"├── c\n"
"│ └── b (moved to)\n"
"│ └── e (moved to)\n"
"│ ├── g (moved to)\n"
"│ └── h (moved to)\n"
"└── i (added)\n"
)
assert_print_statement(export.print_tree, expected_str, tree=tree_only_diff)

@staticmethod
def test_tree_diff_new_leaf(tree_node):
other_tree_node = tree_node.copy()
Expand Down Expand Up @@ -575,6 +635,53 @@ def test_tree_diff_attributes_different_structure_different_attributes_all_diff(
actual = export.tree_to_dict(tree_diff, all_attrs=True)
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"

@staticmethod
def test_tree_diff_attributes_different_structure_different_attributes_all_diff_detail(
tree_node,
):
from bigtree import find_name

tree_node_copy = tree_node.copy()
for node_name_to_remove in ["d"]:
node_to_remove = find_name(tree_node_copy, node_name_to_remove)
node_to_remove.parent = None
for node_name_to_change in ["c", "f"]:
node_to_change = find_name(tree_node_copy, node_name_to_change)
node_to_change.age += 10

# Without attributes
expected_str = (
"a\n"
"├── b\n"
"│ ├── d (removed)\n"
"│ └── e\n"
"│ ├── g\n"
"│ └── h\n"
"└── c\n"
" └── f\n"
)
tree_diff = helper.get_tree_diff(
tree_node, tree_node_copy, only_diff=False, detail=True
)
assert_print_statement(export.print_tree, expected_str, tree=tree_diff)

# With attributes
expected = {
"/a": {"name": "a"},
"/a/b": {"name": "b"},
"/a/b/d (removed)": {"name": "d (removed)"},
"/a/b/e": {"name": "e"},
"/a/b/e/g": {"name": "g"},
"/a/b/e/h": {"name": "h"},
"/a/c (~)": {"age": (60, 70.0), "name": "c (~)"},
"/a/c (~)/f (~)": {"age": (38, 48.0), "name": "f (~)"},
}
tree_diff = helper.get_tree_diff(
tree_node, tree_node_copy, only_diff=False, detail=True, attr_list=["age"]
)
actual = export.tree_to_dict(tree_diff, all_attrs=True)
assert actual == expected, f"Expected\n{expected}\nReceived\n{actual}"

@staticmethod
def test_tree_diff_attributes_invalid_attribute(tree_node):
from bigtree import find_name
Expand Down
Loading