Skip to content

Commit

Permalink
feat: check for moved indicator via dataframe operations
Browse files Browse the repository at this point in the history
  • Loading branch information
kayjan committed Nov 13, 2024
1 parent b667b02 commit 8d866c5
Showing 1 changed file with 38 additions and 32 deletions.
70 changes: 38 additions & 32 deletions bigtree/tree/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def get_tree_diff(
indicator_col = "Exists"
old_suffix = "_old"
new_suffix = "_new"
tree_sep = tree.sep
moved_ind = "moved_ind"

data, data_other = (
export.tree_to_dataframe(
Expand Down Expand Up @@ -475,32 +475,38 @@ def get_tree_diff(
data_path_diff = data_compare

# Handle tree structure difference
paths_removed = list(
data_path_diff[data_path_diff[indicator_col] == "left_only"][path_col]
)[::-1]
paths_added = list(
data_path_diff[data_path_diff[indicator_col] == "right_only"][path_col]
)[::-1]

moved_from_ind: List[bool] = [True for _ in range(len(paths_removed))]
moved_to_ind: List[bool] = [True for _ in range(len(paths_added))]
if detail:
names_removed = [path.split(tree_sep)[-1] for path in paths_removed]
names_added = [path.split(tree_sep)[-1] for path in paths_added]
moved_from_ind = [name in names_added for name in names_removed]
moved_to_ind = [name in names_removed for name in names_added]

path_removed_to_suffix = {
path: "-" if not detail else ("moved from" if move_ind else "removed")
for path, move_ind in zip(paths_removed, moved_from_ind)
data_tree = data_path_diff[data_path_diff[indicator_col] == "left_only"]
data_tree_other = data_path_diff[data_path_diff[indicator_col] == "right_only"]
data_tree[moved_ind] = False
data_tree_other[moved_ind] = False

if len(data_tree) and len(data_tree_other):
# Check for moved from and moved to
move_from_condition = data_tree[
data_tree[name_col].isin(set(data_tree_other[name_col]))
]
data_tree.loc[move_from_condition.index, moved_ind] = True
move_to_condition = data_tree_other[
data_tree_other[name_col].isin(set(data_tree[name_col]))
]
data_tree_other.loc[move_to_condition.index, moved_ind] = True

path_move_from = data_tree.set_index(path_col)[[moved_ind]].to_dict(orient="index")
path_move_to = data_tree_other.set_index(path_col)[[moved_ind]].to_dict(
orient="index"
)

path_move_from_suffix = {
path: "-" if not detail else ("moved from" if v[moved_ind] else "removed")
for path, v in path_move_from.items()
}
path_added_to_suffix = {
path: "+" if not detail else ("moved to" if move_ind else "added")
for path, move_ind in zip(paths_added, moved_to_ind)
path_move_to_suffix = {
path: "+" if not detail else ("moved to" if v[moved_ind] else "added")
for path, v in path_move_to.items()
}

# Check tree attribute difference
dict_attr_diff: Dict[str, Dict[str, Any]] = {}
path_attr_diff: Dict[str, Dict[str, Any]] = {}
if attr_list:
data_both = data_compare[data_compare[indicator_col] == "both"]
condition_attr_diff = (
Expand All @@ -517,7 +523,7 @@ def get_tree_diff(
data_attr_diff = data_both[eval(condition_attr_diff)]
dict_attr_all = data_attr_diff.set_index(path_col).to_dict(orient="index")
for path, node_attr in dict_attr_all.items():
dict_attr_diff[path] = {
path_attr_diff[path] = {
attr: (
node_attr[f"{attr}{old_suffix}"],
node_attr[f"{attr}{new_suffix}"],
Expand All @@ -531,24 +537,24 @@ def get_tree_diff(
if only_diff:
data_compare = data_compare[
(data_compare[indicator_col] != "both")
| (data_compare[path_col].isin(dict_attr_diff.keys()))
| (data_compare[path_col].isin(path_attr_diff.keys()))
]
data_compare = data_compare[[path_col]].sort_values(path_col)
if len(data_compare):
tree_diff = construct.dataframe_to_tree(
data_compare, node_type=tree.__class__, sep=tree.sep
)
for path in sorted(path_removed_to_suffix, reverse=True):
for path in sorted(path_move_from_suffix, reverse=True):
_node = search.find_full_path(tree_diff, path)
_node.name += f""" ({path_removed_to_suffix[path]})"""
for path in sorted(path_added_to_suffix, reverse=True):
_node.name += f""" ({path_move_from_suffix[path]})"""
for path in sorted(path_move_to_suffix, reverse=True):
_node = search.find_full_path(tree_diff, path)
_node.name += f""" ({path_added_to_suffix[path]})"""
_node.name += f""" ({path_move_to_suffix[path]})"""

# Handle tree attribute difference
if dict_attr_diff:
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, dict_attr_diff)
for path in sorted(dict_attr_diff, reverse=True):
if path_attr_diff:
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, path_attr_diff)
for path in sorted(path_attr_diff, reverse=True):
_node = search.find_full_path(tree_diff, path)
_node.name += " (~)"
return tree_diff

0 comments on commit 8d866c5

Please sign in to comment.