From 255852e2dac1b894fc09a48c95c09ca386660030 Mon Sep 17 00:00:00 2001 From: Andrew Olsen Date: Fri, 5 Jun 2020 13:51:10 +1200 Subject: [PATCH] Add support for arbitrary conflict resolutions --- sno/gpkg.py | 13 +++++ sno/merge_util.py | 3 +- sno/resolve.py | 120 +++++++++++++++++++++++++++++------------ tests/test_merge.py | 4 +- tests/test_resolve.py | 121 +++++++++++++++++++++++++++++++++++------- 5 files changed, 205 insertions(+), 56 deletions(-) diff --git a/sno/gpkg.py b/sno/gpkg.py index 9830eabe4..fdd07c109 100644 --- a/sno/gpkg.py +++ b/sno/gpkg.py @@ -1,5 +1,6 @@ import binascii import collections +import json import math import struct @@ -360,6 +361,18 @@ def ogr_to_gpkg_geom( return header + envelope + wkb +def json_ogr_to_gpkg_geom(json_ogr, **kwargs): + """ + Given an OGR geometry object that has been exported to JSON, construct a GPKG geometry value. + See ogr_to_gpkg_geom + """ + if not isinstance(json_ogr, str): + json_ogr = json.dumps(json_ogr) + + ogr_geom = ogr.CreateGeometryFromJson(json_ogr) + return ogr_to_gpkg_geom(ogr_geom, **kwargs) + + def geom_envelope(gpkg_geom): """ Parse GeoPackage geometry to a 2D envelope. diff --git a/sno/merge_util.py b/sno/merge_util.py index 764ebc489..160648d57 100644 --- a/sno/merge_util.py +++ b/sno/merge_util.py @@ -613,7 +613,7 @@ def label(self): if not hasattr(self, "_label"): if self.has_multiple_paths: self._label = " ".join( - f"{v.version_name}={v.path_label}" for v in self.versions if v + f"{v.version_name}={v.path_label}" for v in self.true_versions ) else: self._label = self.any_true_version.path_label @@ -630,6 +630,7 @@ def categorised_label(self): self.has_multiple_paths and len(set(v.table for v in self.true_versions)) > 1 ): + # Conflict involves files in multiple tables. return ["", self.label] table = self.any_true_version.table diff --git a/sno/resolve.py b/sno/resolve.py index 535b4eb9b..cacbde096 100644 --- a/sno/resolve.py +++ b/sno/resolve.py @@ -1,53 +1,97 @@ +import copy +import json +from pathlib import Path + import click +import pygit2 from .cli_util import MutexOption +from .gpkg import json_ogr_to_gpkg_geom from .merge_util import MergeIndex, MergeContext, RichConflict -from .exceptions import InvalidOperation, NotFound, NO_CONFLICT +from .exceptions import InvalidOperation, NotFound, NotYetImplemented, NO_CONFLICT from .repo_files import RepoState +def ungeojson_feature(feature, dataset): + """Given a geojson feature belonging to dataset, returns the feature as a dict containing a gpkg geometry.""" + result = copy.deepcopy(feature['properties']) + if dataset.geom_column_name: + result[dataset.geom_column_name] = json_ogr_to_gpkg_geom(feature['geometry']) + return result + + +def ungeojson_file(file_path, dataset): + """ + Given a file containing multiple geojson features belonging to dataset, + returns the features as dicts containing gpkg geometries. + """ + content = Path(file_path).read_text(encoding="utf-8") + features = json.loads(content)["features"] + return [ungeojson_feature(f, dataset) for f in features] + + +def load_geojson_resolve(file_path, dataset, repo): + """ + Given a file that contains 0 or more geojson features, and a dataset, + returns pygit2.IndexEntrys containing those features when added to that dataset. + """ + + features = ungeojson_file(file_path, dataset) + + entries = [] + for feature in features: + pk = feature[dataset.primary_key] + object_path = "/".join([dataset.path, dataset.get_feature_path(pk)]) + bin_feature = dataset.encode_feature(feature) + blob_id = repo.create_blob(bin_feature) + entries += [pygit2.IndexEntry(object_path, blob_id, pygit2.GIT_FILEMODE_BLOB)] + return entries + + +def ensure_geojson_resolve_supported(rich_conflict): + """ + Ensures that the given conflict can be resolved by a resolution provided in a geojson file. + This is true so long as the conflict only involves a single table + - otherwise we won't know which table should contain the resolution - + and the conflict only involves features, since we can't load metadata from geojson. + """ + categorised_label = rich_conflict.categorised_label + single_table = categorised_label[0] == rich_conflict.any_true_version.table + only_features = categorised_label[1] == "featureConflicts" + if not single_table or not only_features: + raise NotYetImplemented( + "Sorry, only feature conflicts can currently be resolved using --with-file" + ) + + @click.command() @click.pass_context @click.option( - "--ancestor", - "resolve_to_version", - flag_value="ancestor", - help="Resolve the conflict by accepting the 'ancestor' version", - cls=MutexOption, - exclusive_with=["ours", "theirs", "delete"], -) -@click.option( - "--ours", - "resolve_to_version", - flag_value="ours", - help="Resolve the conflict by accepting the 'ours' version", + "--with", + "with_version", + type=click.Choice(["ancestor", "ours", "theirs", "delete"]), + help=( + "Resolve the conflict with any of the existing versions - " + '"ancestor", "ours", or "theirs" - or with "delete" which resolves the conflict by deleting it.' + ), cls=MutexOption, - exclusive_with=["ancestor", "theirs", "delete"], + exclusive_with=["file_path"], ) @click.option( - "--theirs", - "resolve_to_version", - flag_value="theirs", - help="Resolve the conflict by accepting the 'theirs' version", + "--with-file", + "file_path", + type=click.Path(exists=True, dir_okay=False), + help="Resolve the conflict by accepting the version(s) supplied in the given file.", cls=MutexOption, - exclusive_with=["ancestor", "ours", "delete"], + exclusive_with=["with_version"], ) -@click.option( - "--delete", - "resolve_to_version", - flag_value="delete", - help="Resolve the conflict by deleting it", - cls=MutexOption, - exclusive_with=["ancestor", "ours", "theirs"], -) -# TODO - add more options for accepting other, more interesting versions. @click.argument("conflict_label", default=None, required=True) -def resolve(ctx, resolve_to_version, conflict_label): +def resolve(ctx, with_version, file_path, conflict_label): """Resolve a merge conflict. So far only supports resolving to any of the three existing versions.""" repo = ctx.obj.get_repo(allowed_states=[RepoState.MERGING]) - if not resolve_to_version: - raise click.UsageError("Resolve with either --ancestor, --ours or --theirs") + if not (with_version or file_path): + raise click.UsageError("Choose a resolution using --with or --with-file") merge_index = MergeIndex.read_from_repo(repo) merge_context = MergeContext.read_from_repo(repo) @@ -60,13 +104,21 @@ def resolve(ctx, resolve_to_version, conflict_label): f"Conflict at {conflict_label} is already resolved" ) - if resolve_to_version == "delete": + if file_path: + ensure_geojson_resolve_supported(rich_conflict) + # Use any version of the dataset to serialise the feature. + # TODO: This will need more work when schema changes are supported. + dataset = rich_conflict.any_true_version.dataset + res = load_geojson_resolve(file_path, dataset, repo) + + elif with_version == "delete": res = [] else: - res = [getattr(conflict3, resolve_to_version)] + assert with_version in ("ancestor", "ours", "theirs") + res = [getattr(conflict3, with_version)] if res == [None]: click.echo( - f'Version "{resolve_to_version}" does not exist - resolving conflict by deleting.' + f'Version "{with_version}" does not exist - resolving conflict by deleting.' ) res = [] diff --git a/tests/test_merge.py b/tests/test_merge.py index dbab0b3e5..5d81b0d6f 100644 --- a/tests/test_merge.py +++ b/tests/test_merge.py @@ -296,7 +296,7 @@ def test_merge_state_lock(create_conflicts, cli_runner): assert r.exit_code == SUCCESS r = cli_runner.invoke(["conflicts"]) assert r.exit_code == INVALID_OPERATION - r = cli_runner.invoke(["resolve", "dummy_conflict", "--delete"]) + r = cli_runner.invoke(["resolve", "dummy_conflict", "--with=delete"]) assert r.exit_code == INVALID_OPERATION r = cli_runner.invoke(["merge", "theirs_branch"]) @@ -310,5 +310,5 @@ def test_merge_state_lock(create_conflicts, cli_runner): assert r.exit_code == INVALID_OPERATION r = cli_runner.invoke(["conflicts"]) assert r.exit_code == SUCCESS - r = cli_runner.invoke(["resolve", "dummy_conflict", "--delete"]) + r = cli_runner.invoke(["resolve", "dummy_conflict", "--with=delete"]) assert r.exit_code == NO_CONFLICT # "dummy_conflict" is not a real conflict diff --git a/tests/test_resolve.py b/tests/test_resolve.py index 10d599d9a..364d268f5 100644 --- a/tests/test_resolve.py +++ b/tests/test_resolve.py @@ -4,12 +4,37 @@ from sno.diff_output import json_row from sno.exceptions import INVALID_OPERATION from sno.merge_util import MergeIndex +from sno.repo_files import write_repo_file from sno.structure import RepositoryStructure + H = pytest.helpers.helpers() -def test_resolve_conflicts(create_conflicts, cli_runner): +def get_conflict_ids(cli_runner): + r = cli_runner.invoke(["conflicts", "-s", "--flat", "--json"]) + assert r.exit_code == 0, r + return json.loads(r.stdout)["sno.conflicts/v1"] + + +def delete_remaining_conflicts(cli_runner): + # TODO - do this with a single resolve when this is supported. + conflict_ids = get_conflict_ids(cli_runner) + while conflict_ids: + r = cli_runner.invoke(["resolve", conflict_ids[0], "--with=delete"]) + assert r.exit_code == 0, r + conflict_ids = get_conflict_ids(cli_runner) + + +def get_json_feature(rs, layer, pk): + try: + _, feature = rs[layer].get_feature(pk, ogr_geoms=False) + return json_row(feature, H.POLYGONS.LAYER_PK) + except KeyError: + return None + + +def test_resolve_with_version(create_conflicts, cli_runner): with create_conflicts(H.POLYGONS) as repo: r = cli_runner.invoke(["merge", "--continue"]) assert r.exit_code == INVALID_OPERATION @@ -18,13 +43,8 @@ def test_resolve_conflicts(create_conflicts, cli_runner): assert r.exit_code == 0, r assert json.loads(r.stdout)["sno.merge/v1"]["conflicts"] - def get_conflict_ids(cli_runner): - r = cli_runner.invoke(["conflicts", "-s", "--flat", "--json"]) - assert r.exit_code == 0, r - return json.loads(r.stdout)["sno.conflicts/v1"] - conflict_ids = get_conflict_ids(cli_runner) - resolutions = iter(["--ancestor", "--ours", "--theirs", "--delete"]) + resolutions = iter(["ancestor", "ours", "theirs", "delete"]) # Keep track of which order we resolve the conflicts - each conflict # resolved will have a primary key, and we resolve conflicts in @@ -41,7 +61,9 @@ def get_conflict_ids(cli_runner): pk = conflict_id.split("=", 1)[1] pk_order += [pk] - r = cli_runner.invoke(["resolve", conflict_id, next(resolutions)]) + r = cli_runner.invoke( + ["resolve", conflict_id, f"--with={next(resolutions)}"] + ) assert r.exit_code == 0, r conflict_ids = get_conflict_ids(cli_runner) assert len(conflict_ids) == num_conflicts - 1 @@ -69,17 +91,78 @@ def get_conflict_ids(cli_runner): merged = RepositoryStructure.lookup(repo, "HEAD") ours = RepositoryStructure.lookup(repo, "ours_branch") theirs = RepositoryStructure.lookup(repo, "theirs_branch") + l = H.POLYGONS.LAYER - def feature_to_json(rs, pk): - _, feature = rs[H.POLYGONS.LAYER].get_feature(pk, ogr_geoms=False) - return json_row(feature, H.POLYGONS.LAYER_PK) + pk0, pk1, pk2, pk3 = pk_order + # Feature at pk0 was resolved to ancestor, which was None. + assert get_json_feature(merged, l, pk0) is None + assert get_json_feature(merged, l, pk1) == get_json_feature(ours, l, pk1) + assert get_json_feature(merged, l, pk2) == get_json_feature(theirs, l, pk2) + assert get_json_feature(merged, l, pk3) is None - def assert_no_such_feature(rs, pk): - with pytest.raises(KeyError): - rs[H.POLYGONS.LAYER].get_feature(pk, ogr_geoms=False) - pk0, pk1, pk2, pk3 = pk_order - assert_no_such_feature(merged, pk0) # Resolved to ancestor, which was None. - assert feature_to_json(merged, pk1) == feature_to_json(ours, pk1) - assert feature_to_json(merged, pk2) == feature_to_json(theirs, pk2) - assert_no_such_feature(merged, pk3) +def test_resolve_with_file(create_conflicts, cli_runner): + with create_conflicts(H.POLYGONS) as repo: + r = cli_runner.invoke(["diff", "ancestor_branch..ours_branch", "--geojson"]) + assert r.exit_code == 0, r + ours_geojson = json.loads(r.stdout)["features"][0] + assert ours_geojson["id"] == "I::98001" + + r = cli_runner.invoke(["diff", "ancestor_branch..theirs_branch", "--geojson"]) + assert r.exit_code == 0, r + theirs_geojson = json.loads(r.stdout)["features"][0] + assert theirs_geojson["id"] == "I::98001" + + r = cli_runner.invoke(["merge", "theirs_branch", "--json"]) + assert r.exit_code == 0, r + assert json.loads(r.stdout)["sno.merge/v1"]["conflicts"] + + r = cli_runner.invoke(["conflicts", "-s", "--json"]) + assert r.exit_code == 0, r + + conflicts = json.loads(r.stdout)["sno.conflicts/v1"] + add_add_conflict = conflicts[H.POLYGONS.LAYER]["featureConflicts"]["add/add"][0] + assert add_add_conflict == "nz_waca_adjustments:id=98001" + + # These IDs are irrelevant, but we change them to at least be unique. + ours_geojson["id"] = "ours-feature" + theirs_geojson["id"] = "theirs-feature" + # Changing this ID means the two features no long conflict. + theirs_geojson["properties"]["id"] = 98002 + + resolution = { + 'features': [ours_geojson, theirs_geojson], + 'type': 'FeatureCollection', + } + write_repo_file(repo, "resolution.geojson", json.dumps(resolution)) + r = cli_runner.invoke( + ["resolve", add_add_conflict, "--with-file=resolution.geojson"] + ) + assert r.exit_code == 0, r + + merge_index = MergeIndex.read_from_repo(repo) + assert len(merge_index.entries) == 242 + assert len(merge_index.conflicts) == 4 + assert len(merge_index.resolves) == 1 + + ck = next(iter(merge_index.resolves.keys())) + assert len(merge_index.resolves[ck]) == 2 # Resolved with 2 features + + delete_remaining_conflicts(cli_runner) + + r = cli_runner.invoke(["merge", "--continue"]) + + merged = RepositoryStructure.lookup(repo, "HEAD") + ours = RepositoryStructure.lookup(repo, "ours_branch") + theirs = RepositoryStructure.lookup(repo, "theirs_branch") + l = H.POLYGONS.LAYER + + # Both features are present in the merged repo, ours at 98001 and theirs at 98002. + assert get_json_feature(merged, l, 98001) == get_json_feature(ours, l, 98001) + # Theirs feature is slightly different - it has a new primary key. + assert get_json_feature(merged, l, 98002) != get_json_feature(theirs, l, 98001) + + modified_theirs_json = get_json_feature(theirs, l, 98001) + modified_theirs_json["id"] = 98002 + modified_theirs_json["properties"]["id"] = 98002 + assert get_json_feature(merged, l, 98002) == modified_theirs_json