List conflicts - label + classify conflicts

koordinates · May 17, 2020 · 5745e45 · 5745e45
1 parent 1fba96d
commit 5745e45
Show file tree

Hide file tree

Showing 4 changed files with 366 additions and 80 deletions.
diff --git a/sno/conflicts.py b/sno/conflicts.py
@@ -6,7 +6,7 @@
 import click
 import pygit2
 
-from .exceptions import InvalidOperation
+from .exceptions import InvalidOperation, NotYetImplemented
 from .repo_files import (
     ORIG_HEAD,
     MERGE_HEAD,
@@ -20,6 +20,7 @@
     repo_file_exists,
 )
 from .structs import AncestorOursTheirs
+from .structure import RepositoryStructure
 
 
 L = logging.getLogger("sno.conflicts")
@@ -171,58 +172,264 @@ def _ensure_conflict(self, conflict):
             )
 
 
-def summarise_conflicts_json(repo, conflict_index):
-    # Shortcut used often below
-    def aot(generator_or_tuple):
-        return AncestorOursTheirs(*generator_or_tuple)
+class ConflictOutputFormat:
+    """Different ways of showing all the conflicts resulting from a merge."""
 
+    # Only SHORT_SUMMARY is used so far, by `sno merge`.
+
+    # Summaries:
+    SHORT_SUMMARY = 0  # Show counts of types of conflicts.
+    SUMMARY = 1  # List all the features that conflicted.
+
+    # Full diffs: Show all versions of all the features that conflicted, in...
+    FULL_TEXT_DIFF = 2  # ... text format.
+    FULL_JSON_DIFF = 3  # ... JSON format.
+    FULL_GEOJSON_DIFF = 4  # ... GEOJSON format.
+
+    _SUMMARY_FORMATS = (SHORT_SUMMARY, SUMMARY)
+
+
+def list_conflicts(
+    repo, conflict_index, output_format, *, ancestor, ours, theirs, flat=False
+):
+    """
+        Lists all the conflicts in conflict_index, categorised into nested dicts.
+        Example:
+        {
+            "table_A": {
+                "featureConflicts":
+                    "edit/edit": {
+                        "table_A:fid=5": {"ancestor": "...", "ours": ..., "theirs": ...},
+                        "table_A:fid=11": {"ancestor": "...", "ours": ..., "theirs": ...},
+                    },
+                    "add/add": {...}
+                },
+                "metaConflicts": {
+                    "edit/edit": {
+                        "table_A:meta:gpkg_spatial_ref_sys": {"ancestor": ..., "ours": ..., "theirs": ...}
+                    }
+                }
+            },
+            "table_B": {...}
+        }
+        Depending on the output_format, the conflicts themselves could be summarised as counts
+        or as lists of names, eg ["table_1:fid=5", "table_1:fid=11"]
+
+        repo - the pygit2.Repository.
+        conflict_index - the ConflictIndex containing the conflicts found.
+        output_format - one of the constants in ConflictOutputFormat.
+        ancestor, ours, theirs - CommitWithReference objects.
+        flat - if True, don't categorise conflicts. Put them all at the top level.
+    """
+    if output_format not in ConflictOutputFormat._SUMMARY_FORMATS:
+        raise NotYetImplemented(
+            "Sorry, Only SUMMARY and SHORT_SUMMARY are supported at present"
+        )
+
+    repo_structure3 = AncestorOursTheirs(
+        RepositoryStructure(repo, ancestor.commit),
+        RepositoryStructure(repo, ours.commit),
+        RepositoryStructure(repo, theirs.commit),
+    )
     conflicts = {}
 
     for key, conflict3 in conflict_index.conflicts.items():
-        paths3 = aot(c.path if c else None for c in conflict3)
-        if not any(paths3):
-            # Shouldn't happen
-            raise RuntimeError("Conflict has no paths")
-
-        # Paths look like this: # path/to/table/.sno-table/feature_path for features
-        # Or path/to/table/.sno-table/meta/... for metadata
-        # (This will need updating if newer dataset versions don't follow this pattern.)
-        tables3 = aot(p.split("/.sno-table/", 1)[0] if p else None for p in paths3)
-        actual_tables = [t for t in tables3 if t]
-        all_same_table = all(a == actual_tables[0] for a in actual_tables)
-        if not all_same_table:
-            # This is a really bad conflict - it seems to involve multiple tables.
-            # Perhaps features were moved from one table to another, or perhaps
-            # a table was renamed.
-            conflicts.setdefault("<other>", 0)
-            conflicts["<other>"] += 1
-            continue
-
-        table = actual_tables[0]
-        conflicts.setdefault(table, {})
-        conflicts_table = conflicts[table]
-
-        meta_change = any("/.sno-table/meta/" in (p or "") for p in paths3)
-        if meta_change:
-            conflicts_table.setdefault("metaConflicts", 0)
-            conflicts_table["metaConflicts"] += 1
-            continue
-
-        conflicts_table.setdefault("featureConflicts", {})
-        feature_conflicts = conflicts_table["featureConflicts"]
-
-        all_same_path = all((p == paths3[0] for p in paths3))
-        if all_same_path:
-            feature_conflicts.setdefault("edit/edit", 0)
-            feature_conflicts["edit/edit"] += 1
-            continue
-
-        feature_conflicts.setdefault("other", 0)
-        feature_conflicts["other"] += 1
+        decoded_path3 = decode_conflict_paths(conflict3, repo_structure3)
+        conflict_dict = get_conflict_as_dict(
+            conflict3, repo_structure3, decoded_path3, output_format
+        )
+        if flat:
+            conflicts.update(conflict_dict)
+        else:
+            conflict_category = get_conflict_category(decoded_path3)
+            add_conflict_dict_to_category(conflicts, conflict_category, conflict_dict)
+
+    if output_format in ConflictOutputFormat._SUMMARY_FORMATS:
+        conflicts = summarise_conflicts(conflicts, output_format)
 
     return conflicts
 
 
+def decode_conflict_paths(conflict3, repo_structure3):
+    """
+    Given 3 versions of an IndexEntry, and 3 versions of a repository_structure,
+    return 3 versions of a decoded path - see RepositoryStructure.decode_path.
+    """
+    return AncestorOursTheirs(
+        *(
+            rs.decode_path(c.path) if c else None
+            for c, rs, in zip(conflict3, repo_structure3)
+        )
+    )
+
+
+def get_conflict_category(decoded_path3):
+    """
+    Given 3 versions of the decoded path, tries to categorise the conflict,
+    so that similar conflicts can be grouped together.
+    For example, a returned category might be:
+    ["table_A", "featureConflicts", "edit/edit"]
+    Meaning conflicting edits were made to a feature in table_A.
+    """
+    dpath3 = decoded_path3
+    actual_dpaths = [p for p in dpath3 if p]
+    actual_tables = [p[0] for p in actual_dpaths]
+    all_same_table = len(set(actual_tables)) == 1
+
+    if not all_same_table:
+        return ["<uncategorised>"]
+    table = actual_tables[0]
+
+    actual_tableparts = [p[1] for p in actual_dpaths]
+    all_same_tablepart = len(set(actual_tableparts)) == 1
+    if all_same_tablepart:
+        tablepart = actual_tableparts[0] + "Conflicts"
+    else:
+        # Meta/feature conflict. Shouldn't really happen.
+        return [table, "<uncategorised>"]
+
+    # <uncategorised> type currently includes everything involving renames.
+    conflict_type = "<uncategorised>"
+    all_same_path = len(set(actual_dpaths)) == 1
+    if all_same_path:
+        if not dpath3.ancestor:
+            if dpath3.ours and dpath3.theirs:
+                conflict_type = "add/add"
+        else:
+            if dpath3.ours and dpath3.theirs:
+                conflict_type = "edit/edit"
+            elif dpath3.ours or dpath3.theirs:
+                conflict_type = "edit/delete"
+
+    return [table, tablepart, conflict_type]
+
+
+# Stand in for a conflict if the conflict is going to be summarised anyway -
+# this helps code re-use between summary and full-diff output modes.
+_CONFLICT_PLACEHOLDER = object()
+
+
+def get_conflict_as_dict(conflict3, repo_structure3, decoded_path3, output_format):
+    """
+    Given 3 versions of an IndexEntry, 3 versions of the repository_structure,
+    and 3 versions of the decoded_path, returns a representation of the conflict
+    as a dict according to the output format. The outermost dict only contains
+    a single key, which is a unique name for the conflict.
+    For example:
+    {"table_A:fid=5": {"ancestor": ..., "ours": ..., "theirs": ...}}
+    """
+
+    label = get_conflict_label(decoded_path3)
+    if output_format in ConflictOutputFormat._SUMMARY_FORMATS:
+        # No need to output info about conflict itself - it will be summarised -
+        # so we just return a placeholder.
+        return {label: _CONFLICT_PLACEHOLDER}
+    else:
+        # TODO - return {label: {"ancestor": ..., "ours": ..., "theirs": ...}}
+        raise NotYetImplemented("Output of full conflict diffs is not supported")
+
+
+def get_conflict_label(decoded_path3):
+    """
+    Given 3 versions of the decoded path, returns a unique name for a conflict.
+    In simply cases, this will be something like: "table_A:fid=5"
+    But if renames have occurred, it could have multiple names, eg:
+    "ancestor=table_A:fid=5 ours=table_A:fid=6 theirs=table_A:fid=12"
+    """
+    dpath3 = decoded_path3
+    actual_dpaths = [p for p in dpath3 if p]
+    all_same_path = len(set(actual_dpaths)) == 1
+
+    if all_same_path:
+        return decoded_path_to_label(actual_dpaths[0])
+
+    label3 = AncestorOursTheirs(
+        *(
+            f"{v}={decoded_path_to_label(p)}" if p else None
+            for v, p, in zip(AncestorOursTheirs.NAMES, decoded_path3)
+        )
+    )
+    return " ".join([l for l in label3 if l])
+
+
+def decoded_path_to_label(decoded_path):
+    """
+    Converts a decoded path to a unique name, eg:
+    ("table_A", "feature", "fid", 5) -> "table_A:fid=5"
+    """
+    if decoded_path is None:
+        return None
+    if decoded_path[1] == "feature":
+        table, tablepart, pk_field, pk = decoded_path
+        return f"{table}:{pk_field}={pk}"
+    else:
+        return ":".join(decoded_path)
+
+
+def add_conflict_dict_to_category(root_dict, conflict_category, conflict_dict):
+    """
+    Ensures the given category of conflicts exists, and then adds
+    the given conflict dict to it.
+    """
+    cur_dict = root_dict
+    for c in conflict_category:
+        cur_dict.setdefault(c, {})
+        cur_dict = cur_dict[c]
+
+    cur_dict.update(conflict_dict)
+
+
+def summarise_conflicts(cur_dict, output_format):
+    """
+    Recursively traverses the tree of categorised conflicts,
+    looking for a dict where the values are placeholders.
+    For example:
+    {
+        K1: _CONFLICT_PLACEHOLDER,
+        K2: _CONFLICT_PLACEHOLDER,
+    }
+    When found, it will be replaced with one of the following:
+    1) SHORT_SUMMARY: 2 (the size of the dict)
+    2) SUMMARY: [K1, K2]
+    """
+    first_value = next(iter(cur_dict.values()))
+    if first_value == _CONFLICT_PLACEHOLDER:
+        if output_format == ConflictOutputFormat.SHORT_SUMMARY:
+            return len(cur_dict)
+        else:
+
+            def sortkey(item):
+                tableybit, pk = item.split('=')
+                pk = int(pk) if pk.isdigit() else pk
+                return tableybit, pk
+
+            return sorted([k for k in cur_dict.keys()], key=_label_sort_key)
+
+    for k, v in cur_dict.items():
+        cur_dict[k] = summarise_conflicts(v, output_format)
+    return cur_dict
+
+
+def _label_sort_key(label):
+    """Sort labels of conflicts in a sensible way."""
+    if (
+        label.startswith("ancestor=")
+        or label.startswith("ours=")
+        or label.startswith("theirs=")
+    ):
+        # Put the complicated conflicts last.
+        return "Z multiple-path", label
+
+    parts = label.split('=', 1)
+    if len(parts) == 2:
+        prefix, pk = parts
+        pk = int(pk) if pk.isdigit() else pk
+        return "Feature", prefix, pk
+    else:
+        # TODO: maybe meta conflicts should go before feature conflicts.
+        return "Meta", label
+
+
 def move_repo_to_merging_state(
     repo, conflict_index, merge_message, *, ancestor, ours, theirs
 ):
@@ -269,21 +476,29 @@ def abort_merging_state(repo):
     # Not sure if it matters - we don't modify HEAD when we move into merging state.
 
 
-def output_json_conflicts_as_text(jdict):
-    for table, table_conflicts in sorted(jdict.items()):
-        if table == "<other>":
-            continue
-        click.secho(f"{table}:", bold=True)
-        meta_conflicts = table_conflicts.get("metaConflicts", 0)
-        if meta_conflicts:
-            click.echo(f"  META conflicts: {meta_conflicts}")
-        feature_conflicts = table_conflicts.get("featureConflicts", {})
-        if feature_conflicts:
-            click.echo("  Feature conflicts:")
-            for k, v in sorted(feature_conflicts.items()):
-                click.echo(f"    {k}: {v}")
-        click.echo()
-
-    non_table_conflicts = jdict.get("<other>", 0)
-    if non_table_conflicts:
-        click.secho(f"Other conflicts: {non_table_conflicts}", bold=True)
+_JSON_KEYS_TO_TEXT_HEADERS = {
+    "featureConflicts": "Feature conflicts",
+    "metaConflicts": "META conflicts",
+}
+
+
+def output_conflicts_as_text(jdict, level=0):
+    """Writes the JSON output of list_conflicts to stdout as text, using click.echo."""
+    top_level = level == 0
+    indent = "  " * level
+
+    for k, v in sorted(jdict.items()):
+        heading = _JSON_KEYS_TO_TEXT_HEADERS.get(k, k)
+        if isinstance(v, dict):
+            click.secho(f"{indent}{heading}:", bold=top_level)
+            output_conflicts_as_text(v, level + 1)
+            if top_level:
+                click.echo()
+        elif isinstance(v, list):
+            click.secho(f"{indent}{heading}:", bold=top_level)
+            for item in v:
+                click.echo(f"{indent}  {item}")
+            if top_level:
+                click.echo()
+        else:
+            click.echo(f"{indent}{heading}: {v}")