Skip to content

Commit

Permalink
List conflicts - label + classify conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
olsen232 committed May 17, 2020
1 parent 1fba96d commit 5745e45
Show file tree
Hide file tree
Showing 4 changed files with 366 additions and 80 deletions.
341 changes: 278 additions & 63 deletions sno/conflicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import click
import pygit2

from .exceptions import InvalidOperation
from .exceptions import InvalidOperation, NotYetImplemented
from .repo_files import (
ORIG_HEAD,
MERGE_HEAD,
Expand All @@ -20,6 +20,7 @@
repo_file_exists,
)
from .structs import AncestorOursTheirs
from .structure import RepositoryStructure


L = logging.getLogger("sno.conflicts")
Expand Down Expand Up @@ -171,58 +172,264 @@ def _ensure_conflict(self, conflict):
)


def summarise_conflicts_json(repo, conflict_index):
# Shortcut used often below
def aot(generator_or_tuple):
return AncestorOursTheirs(*generator_or_tuple)
class ConflictOutputFormat:
"""Different ways of showing all the conflicts resulting from a merge."""

# Only SHORT_SUMMARY is used so far, by `sno merge`.

# Summaries:
SHORT_SUMMARY = 0 # Show counts of types of conflicts.
SUMMARY = 1 # List all the features that conflicted.

# Full diffs: Show all versions of all the features that conflicted, in...
FULL_TEXT_DIFF = 2 # ... text format.
FULL_JSON_DIFF = 3 # ... JSON format.
FULL_GEOJSON_DIFF = 4 # ... GEOJSON format.

_SUMMARY_FORMATS = (SHORT_SUMMARY, SUMMARY)


def list_conflicts(
repo, conflict_index, output_format, *, ancestor, ours, theirs, flat=False
):
"""
Lists all the conflicts in conflict_index, categorised into nested dicts.
Example:
{
"table_A": {
"featureConflicts":
"edit/edit": {
"table_A:fid=5": {"ancestor": "...", "ours": ..., "theirs": ...},
"table_A:fid=11": {"ancestor": "...", "ours": ..., "theirs": ...},
},
"add/add": {...}
},
"metaConflicts": {
"edit/edit": {
"table_A:meta:gpkg_spatial_ref_sys": {"ancestor": ..., "ours": ..., "theirs": ...}
}
}
},
"table_B": {...}
}
Depending on the output_format, the conflicts themselves could be summarised as counts
or as lists of names, eg ["table_1:fid=5", "table_1:fid=11"]
repo - the pygit2.Repository.
conflict_index - the ConflictIndex containing the conflicts found.
output_format - one of the constants in ConflictOutputFormat.
ancestor, ours, theirs - CommitWithReference objects.
flat - if True, don't categorise conflicts. Put them all at the top level.
"""
if output_format not in ConflictOutputFormat._SUMMARY_FORMATS:
raise NotYetImplemented(
"Sorry, Only SUMMARY and SHORT_SUMMARY are supported at present"
)

repo_structure3 = AncestorOursTheirs(
RepositoryStructure(repo, ancestor.commit),
RepositoryStructure(repo, ours.commit),
RepositoryStructure(repo, theirs.commit),
)
conflicts = {}

for key, conflict3 in conflict_index.conflicts.items():
paths3 = aot(c.path if c else None for c in conflict3)
if not any(paths3):
# Shouldn't happen
raise RuntimeError("Conflict has no paths")

# Paths look like this: # path/to/table/.sno-table/feature_path for features
# Or path/to/table/.sno-table/meta/... for metadata
# (This will need updating if newer dataset versions don't follow this pattern.)
tables3 = aot(p.split("/.sno-table/", 1)[0] if p else None for p in paths3)
actual_tables = [t for t in tables3 if t]
all_same_table = all(a == actual_tables[0] for a in actual_tables)
if not all_same_table:
# This is a really bad conflict - it seems to involve multiple tables.
# Perhaps features were moved from one table to another, or perhaps
# a table was renamed.
conflicts.setdefault("<other>", 0)
conflicts["<other>"] += 1
continue

table = actual_tables[0]
conflicts.setdefault(table, {})
conflicts_table = conflicts[table]

meta_change = any("/.sno-table/meta/" in (p or "") for p in paths3)
if meta_change:
conflicts_table.setdefault("metaConflicts", 0)
conflicts_table["metaConflicts"] += 1
continue

conflicts_table.setdefault("featureConflicts", {})
feature_conflicts = conflicts_table["featureConflicts"]

all_same_path = all((p == paths3[0] for p in paths3))
if all_same_path:
feature_conflicts.setdefault("edit/edit", 0)
feature_conflicts["edit/edit"] += 1
continue

feature_conflicts.setdefault("other", 0)
feature_conflicts["other"] += 1
decoded_path3 = decode_conflict_paths(conflict3, repo_structure3)
conflict_dict = get_conflict_as_dict(
conflict3, repo_structure3, decoded_path3, output_format
)
if flat:
conflicts.update(conflict_dict)
else:
conflict_category = get_conflict_category(decoded_path3)
add_conflict_dict_to_category(conflicts, conflict_category, conflict_dict)

if output_format in ConflictOutputFormat._SUMMARY_FORMATS:
conflicts = summarise_conflicts(conflicts, output_format)

return conflicts


def decode_conflict_paths(conflict3, repo_structure3):
"""
Given 3 versions of an IndexEntry, and 3 versions of a repository_structure,
return 3 versions of a decoded path - see RepositoryStructure.decode_path.
"""
return AncestorOursTheirs(
*(
rs.decode_path(c.path) if c else None
for c, rs, in zip(conflict3, repo_structure3)
)
)


def get_conflict_category(decoded_path3):
"""
Given 3 versions of the decoded path, tries to categorise the conflict,
so that similar conflicts can be grouped together.
For example, a returned category might be:
["table_A", "featureConflicts", "edit/edit"]
Meaning conflicting edits were made to a feature in table_A.
"""
dpath3 = decoded_path3
actual_dpaths = [p for p in dpath3 if p]
actual_tables = [p[0] for p in actual_dpaths]
all_same_table = len(set(actual_tables)) == 1

if not all_same_table:
return ["<uncategorised>"]
table = actual_tables[0]

actual_tableparts = [p[1] for p in actual_dpaths]
all_same_tablepart = len(set(actual_tableparts)) == 1
if all_same_tablepart:
tablepart = actual_tableparts[0] + "Conflicts"
else:
# Meta/feature conflict. Shouldn't really happen.
return [table, "<uncategorised>"]

# <uncategorised> type currently includes everything involving renames.
conflict_type = "<uncategorised>"
all_same_path = len(set(actual_dpaths)) == 1
if all_same_path:
if not dpath3.ancestor:
if dpath3.ours and dpath3.theirs:
conflict_type = "add/add"
else:
if dpath3.ours and dpath3.theirs:
conflict_type = "edit/edit"
elif dpath3.ours or dpath3.theirs:
conflict_type = "edit/delete"

return [table, tablepart, conflict_type]


# Stand in for a conflict if the conflict is going to be summarised anyway -
# this helps code re-use between summary and full-diff output modes.
_CONFLICT_PLACEHOLDER = object()


def get_conflict_as_dict(conflict3, repo_structure3, decoded_path3, output_format):
"""
Given 3 versions of an IndexEntry, 3 versions of the repository_structure,
and 3 versions of the decoded_path, returns a representation of the conflict
as a dict according to the output format. The outermost dict only contains
a single key, which is a unique name for the conflict.
For example:
{"table_A:fid=5": {"ancestor": ..., "ours": ..., "theirs": ...}}
"""

label = get_conflict_label(decoded_path3)
if output_format in ConflictOutputFormat._SUMMARY_FORMATS:
# No need to output info about conflict itself - it will be summarised -
# so we just return a placeholder.
return {label: _CONFLICT_PLACEHOLDER}
else:
# TODO - return {label: {"ancestor": ..., "ours": ..., "theirs": ...}}
raise NotYetImplemented("Output of full conflict diffs is not supported")


def get_conflict_label(decoded_path3):
"""
Given 3 versions of the decoded path, returns a unique name for a conflict.
In simply cases, this will be something like: "table_A:fid=5"
But if renames have occurred, it could have multiple names, eg:
"ancestor=table_A:fid=5 ours=table_A:fid=6 theirs=table_A:fid=12"
"""
dpath3 = decoded_path3
actual_dpaths = [p for p in dpath3 if p]
all_same_path = len(set(actual_dpaths)) == 1

if all_same_path:
return decoded_path_to_label(actual_dpaths[0])

label3 = AncestorOursTheirs(
*(
f"{v}={decoded_path_to_label(p)}" if p else None
for v, p, in zip(AncestorOursTheirs.NAMES, decoded_path3)
)
)
return " ".join([l for l in label3 if l])


def decoded_path_to_label(decoded_path):
"""
Converts a decoded path to a unique name, eg:
("table_A", "feature", "fid", 5) -> "table_A:fid=5"
"""
if decoded_path is None:
return None
if decoded_path[1] == "feature":
table, tablepart, pk_field, pk = decoded_path
return f"{table}:{pk_field}={pk}"
else:
return ":".join(decoded_path)


def add_conflict_dict_to_category(root_dict, conflict_category, conflict_dict):
"""
Ensures the given category of conflicts exists, and then adds
the given conflict dict to it.
"""
cur_dict = root_dict
for c in conflict_category:
cur_dict.setdefault(c, {})
cur_dict = cur_dict[c]

cur_dict.update(conflict_dict)


def summarise_conflicts(cur_dict, output_format):
"""
Recursively traverses the tree of categorised conflicts,
looking for a dict where the values are placeholders.
For example:
{
K1: _CONFLICT_PLACEHOLDER,
K2: _CONFLICT_PLACEHOLDER,
}
When found, it will be replaced with one of the following:
1) SHORT_SUMMARY: 2 (the size of the dict)
2) SUMMARY: [K1, K2]
"""
first_value = next(iter(cur_dict.values()))
if first_value == _CONFLICT_PLACEHOLDER:
if output_format == ConflictOutputFormat.SHORT_SUMMARY:
return len(cur_dict)
else:

def sortkey(item):
tableybit, pk = item.split('=')
pk = int(pk) if pk.isdigit() else pk
return tableybit, pk

return sorted([k for k in cur_dict.keys()], key=_label_sort_key)

for k, v in cur_dict.items():
cur_dict[k] = summarise_conflicts(v, output_format)
return cur_dict


def _label_sort_key(label):
"""Sort labels of conflicts in a sensible way."""
if (
label.startswith("ancestor=")
or label.startswith("ours=")
or label.startswith("theirs=")
):
# Put the complicated conflicts last.
return "Z multiple-path", label

parts = label.split('=', 1)
if len(parts) == 2:
prefix, pk = parts
pk = int(pk) if pk.isdigit() else pk
return "Feature", prefix, pk
else:
# TODO: maybe meta conflicts should go before feature conflicts.
return "Meta", label


def move_repo_to_merging_state(
repo, conflict_index, merge_message, *, ancestor, ours, theirs
):
Expand Down Expand Up @@ -269,21 +476,29 @@ def abort_merging_state(repo):
# Not sure if it matters - we don't modify HEAD when we move into merging state.


def output_json_conflicts_as_text(jdict):
for table, table_conflicts in sorted(jdict.items()):
if table == "<other>":
continue
click.secho(f"{table}:", bold=True)
meta_conflicts = table_conflicts.get("metaConflicts", 0)
if meta_conflicts:
click.echo(f" META conflicts: {meta_conflicts}")
feature_conflicts = table_conflicts.get("featureConflicts", {})
if feature_conflicts:
click.echo(" Feature conflicts:")
for k, v in sorted(feature_conflicts.items()):
click.echo(f" {k}: {v}")
click.echo()

non_table_conflicts = jdict.get("<other>", 0)
if non_table_conflicts:
click.secho(f"Other conflicts: {non_table_conflicts}", bold=True)
_JSON_KEYS_TO_TEXT_HEADERS = {
"featureConflicts": "Feature conflicts",
"metaConflicts": "META conflicts",
}


def output_conflicts_as_text(jdict, level=0):
"""Writes the JSON output of list_conflicts to stdout as text, using click.echo."""
top_level = level == 0
indent = " " * level

for k, v in sorted(jdict.items()):
heading = _JSON_KEYS_TO_TEXT_HEADERS.get(k, k)
if isinstance(v, dict):
click.secho(f"{indent}{heading}:", bold=top_level)
output_conflicts_as_text(v, level + 1)
if top_level:
click.echo()
elif isinstance(v, list):
click.secho(f"{indent}{heading}:", bold=top_level)
for item in v:
click.echo(f"{indent} {item}")
if top_level:
click.echo()
else:
click.echo(f"{indent}{heading}: {v}")
Loading

0 comments on commit 5745e45

Please sign in to comment.