Skip to content

Commit

Permalink
Merge pull request #998 from koordinates/delta-filters
Browse files Browse the repository at this point in the history
Added delta-filters
  • Loading branch information
olsen232 authored Jun 20, 2024
2 parents 6b9d372 + 614a28b commit cb74bd6
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 228 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Please note that compatibility for 0.x releases (software or repositories) isn't

_When adding new entries to the changelog, please include issue/PR numbers wherever possible._

## 0.15.3 (UNRELEASED)

- Replaces minimal patches with delta-filters - a more general-purpose way of filtering parts (inserts, updates, deletes) of JSON diffs when not all parts are required. [#998](https://github.com/koordinates/kart/pull/998)

## 0.15.2

- Adds a new command `kart export` which enables export of vector or tabular datasets to any OGR format. [#992](https://github.com/koordinates/kart/issues/992)
Expand Down
60 changes: 18 additions & 42 deletions kart/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from kart.exceptions import (
NO_TABLE,
NO_WORKING_COPY,
PATCH_DOES_NOT_APPLY,
InvalidOperation,
NotFound,
NotYetImplemented,
Expand Down Expand Up @@ -135,27 +134,23 @@ def parse(self, f):
class FeatureDeltaParser:
"""Parses JSON for a delta - ie {"-": old-value, "+": new-value} - into a Delta object."""

def __init__(self, old_schema, new_schema, *, allow_minimal_updates=False):
def __init__(self, old_schema, new_schema):
self.old_parser = (
KeyValueParser(old_schema) if old_schema else NullSchemaParser("old")
)
self.new_parser = (
KeyValueParser(new_schema) if new_schema else NullSchemaParser("new")
)
self.allow_minimal_updates = allow_minimal_updates

def parse(self, change):
if "*" in change:
if self.allow_minimal_updates:
return Delta(
None,
self.new_parser.parse(change.get("*")),
)
else:
raise InvalidOperation(
"No 'base' commit specified in patch, can't accept '*' deltas",
exit_code=PATCH_DOES_NOT_APPLY,
)
raise NotYetImplemented(
"Sorry, minimal patches with * values are no longer supported."
)
if "--" in change:
return Delta.delete(self.old_parser.parse(change["--"]))
elif "++" in change:
return Delta.insert(self.new_parser.parse(change["++"]))
else:
return Delta(
self.old_parser.parse(change.get("-")),
Expand Down Expand Up @@ -186,7 +181,7 @@ def _build_signature(patch_metadata, person, repo):
return repo.author_signature(**signature)


def parse_file_diff(file_diff_input, allow_minimal_updates=None):
def parse_file_diff(file_diff_input):
def convert_half_delta(half_delta):
if half_delta is None:
return None
Expand All @@ -201,35 +196,25 @@ def convert_delta(delta):
return Delta(convert_half_delta(delta.old), convert_half_delta(delta.new))

delta_diff = DeltaDiff(
convert_delta(
Delta.from_key_and_plus_minus_dict(
k, v, allow_minimal_updates=allow_minimal_updates
)
)
convert_delta(Delta.from_key_and_plus_minus_dict(k, v))
for (k, v) in file_diff_input.items()
)
return DatasetDiff([(FILES_KEY, delta_diff)])


def parse_meta_diff(meta_diff_input, allow_minimal_updates=False):
def parse_meta_diff(meta_diff_input):
def convert_delta(delta):
if delta.old_key == "schema.json" or delta.new_key == "schema.json":
return Schema.schema_delta_from_raw_delta(delta)
return delta

return DeltaDiff(
convert_delta(
Delta.from_key_and_plus_minus_dict(
k, v, allow_minimal_updates=allow_minimal_updates
)
)
convert_delta(Delta.from_key_and_plus_minus_dict(k, v))
for (k, v) in meta_diff_input.items()
)


def parse_feature_diff(
feature_diff_input, dataset, meta_diff, allow_minimal_updates=False
):
def parse_feature_diff(feature_diff_input, dataset, meta_diff):
old_schema = new_schema = None
if dataset is not None:
old_schema = new_schema = dataset.schema
Expand All @@ -240,11 +225,7 @@ def parse_feature_diff(
if schema_delta and schema_delta.new_value:
new_schema = schema_delta.new_value

delta_parser = FeatureDeltaParser(
old_schema,
new_schema,
allow_minimal_updates=allow_minimal_updates,
)
delta_parser = FeatureDeltaParser(old_schema, new_schema)
return DeltaDiff((delta_parser.parse(change) for change in feature_diff_input))


Expand Down Expand Up @@ -307,8 +288,6 @@ def apply_patch(
if do_commit:
check_git_user(repo)

allow_minimal_updates = bool(resolve_missing_values_from_rs)

rs = repo.structure(ref)
# TODO: this code shouldn't special-case tabular working copies
# Specifically, we need to check if those part(s) of the WC exists which the patch applies to.
Expand All @@ -322,7 +301,7 @@ def apply_patch(
repo_diff = RepoDiff()
for ds_path, ds_diff_input in diff_input.items():
if ds_path == FILES_KEY:
repo_diff[FILES_KEY] = parse_file_diff(ds_diff_input, allow_minimal_updates)
repo_diff[FILES_KEY] = parse_file_diff(ds_diff_input)
continue

dataset = rs.datasets().get(ds_path)
Expand All @@ -334,16 +313,14 @@ def apply_patch(
meta_diff_input = ds_diff_input.get("meta", {})

if meta_diff_input:
meta_diff = parse_meta_diff(meta_diff_input, allow_minimal_updates)
meta_diff = parse_meta_diff(meta_diff_input)
repo_diff.recursive_set([ds_path, "meta"], meta_diff)
else:
meta_diff = None

feature_diff_input = ds_diff_input.get("feature", [])
if feature_diff_input:
feature_diff = parse_feature_diff(
feature_diff_input, dataset, meta_diff, allow_minimal_updates
)
feature_diff = parse_feature_diff(feature_diff_input, dataset, meta_diff)
repo_diff.recursive_set([ds_path, "feature"], feature_diff)

if do_commit:
Expand All @@ -364,8 +341,7 @@ def apply_patch(
new_wc_target = None
else:
new_wc_target = rs.create_tree_from_diff(
repo_diff,
resolve_missing_values_from_rs=resolve_missing_values_from_rs,
repo_diff, resolve_missing_values_from_rs=resolve_missing_values_from_rs
)

if new_wc_target:
Expand Down
1 change: 1 addition & 0 deletions kart/base_diff_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
output_path="-",
*,
json_style="pretty",
delta_filter=None,
target_crs=None,
# used by json-lines diffs only
diff_estimate_accuracy=None,
Expand Down
28 changes: 28 additions & 0 deletions kart/cli_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,34 @@ def shell_complete(self, ctx=None, param=None, incomplete=""):
]


class DeltaFilterType(click.ParamType):
"""
Filters parts of individual deltas - new or old values for inserts, updates, or deletes.
"--" is the key for old values of deletes
"-" is the key for old values of updates
"+" is the key for new values of updates
"++" is they key for new values of inserts
"""

name = "string"

ALLOWED_KEYS = ("--", "-", "+", "++")

def convert(self, value, param, ctx):
from kart.key_filters import DeltaFilter

if value is None:
return None
if value.lower() == "all":
return DeltaFilter.MATCH_ALL
pieces = value.split(",")
if any(p for p in pieces if p not in self.ALLOWED_KEYS):
self.fail(
"Delta filter only accepts any subset of the following comma separated keys: --,-,+,++"
)
return DeltaFilter(pieces)


def find_param(ctx_or_params, name):
"""Given the click context / command / list of params - find the param with the given name."""
ctx = ctx_or_params
Expand Down
16 changes: 15 additions & 1 deletion kart/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import click

from kart import diff_estimation
from kart.cli_util import OutputFormatType
from kart.cli_util import OutputFormatType, DeltaFilterType
from kart.completion_shared import ref_or_repo_path_completer
from kart.crs_util import CoordinateReferenceString
from kart.output_util import dump_json_output
Expand Down Expand Up @@ -129,6 +129,18 @@ def feature_count_diff(
is_flag=True,
help="Show changes to file contents (instead of just showing the object IDs of changed files)",
)
@click.option(
"--delta-filter",
type=DeltaFilterType(),
help="Filter out particular parts of each delta - for example, --delta-filter=+ only shows new values of updates. "
"Setting this option modifies Kart's behaviour when outputting JSON diffs - "
"instead using minus to mean old value and plus to mean new value, it uses a more specific scheme: "
"-- (minus-minus) means deleted value, ++ (plus-plus) means inserted value, "
"and - and + still mean old and new value but are only used for updates (not for inserts and deletes). "
"These keys are used when outputting the diffs, and these keys can be whitelisted using this flag to minimise the "
"size of the diff if some types of values are not required. "
"As a final example, --delta-filter=all is equivalent to --delta-filter=--,-,+,++",
)
@click.option(
"--html-template",
default=None,
Expand All @@ -153,6 +165,7 @@ def diff(
add_feature_count_estimate,
convert_to_dataset_format,
diff_files,
delta_filter,
html_template,
args,
):
Expand Down Expand Up @@ -210,6 +223,7 @@ def diff(
filters,
output_path,
json_style=fmt,
delta_filter=delta_filter,
target_crs=crs,
diff_estimate_accuracy=add_feature_count_estimate,
html_template=html_template,
Expand Down
56 changes: 39 additions & 17 deletions kart/diff_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
from dataclasses import dataclass
from typing import Any

from kart.diff_format import DiffFormat

from .exceptions import InvalidOperation


Expand Down Expand Up @@ -104,17 +102,11 @@ def delete(old):
return Delta(old, None)

@staticmethod
def from_key_and_plus_minus_dict(key, d, allow_minimal_updates=False):
if "*" in d:
if allow_minimal_updates:
return Delta(
None,
(key, d["*"]),
)
else:
raise InvalidOperation(
"No 'base' commit specified in patch, can't accept '*' deltas"
)
def from_key_and_plus_minus_dict(key, d):
if "--" in d:
return Delta.delete((key, d["++"]))
elif "++" in d:
return Delta.insert((key, d["++"]))
else:
return Delta(
(key, d["-"]) if "-" in d else None,
Expand Down Expand Up @@ -193,10 +185,17 @@ def __add__(self, other):
result.flags = self.flags | other.flags
return result

def to_plus_minus_dict(self, minimal=False):
# NOTE - it might make more sense to have:
# insert ++ delete -- update - + minimal-update +
# but this is a breaking change, would need to be co-ordinated.
def to_plus_minus_dict(self, delta_filter=None):
from .key_filters import DeltaFilter

if delta_filter is None:
return self.to_plus_minus_dict__simple()
else:
return self.to_plus_minus_dict__advanced(delta_filter)

def to_plus_minus_dict__simple(self, minimal=False):
# Simplest behaviour - minus means old value, plus means new value.
# Not configurable.
if minimal and self.old and self.new:
return {"*": self.new_value}
result = {}
Expand All @@ -206,6 +205,29 @@ def to_plus_minus_dict(self, minimal=False):
result["+"] = self.new_value
return result

def to_plus_minus_dict__advanced(self, delta_filter=None):
# New, more complicated but more useful / configurable behaviour.
# Uses different keys for inserts / updates / deletes.
# Currently only used when --delta-filter is requested.
# "--" means delete's old value
# "-" means update's old value
# "+" means update's new value.
# "++" means insert's new value.

if delta_filter is None:
from .key_filters import DeltaFilter

delta_filter = DeltaFilter.MATCH_ALL
result = {}
if self.old and self.new:
result["-"] = self.old_value if "-" in delta_filter else None
result["+"] = self.new_value if "+" in delta_filter else None
elif self.old:
result["--"] = self.old_value if "--" in delta_filter else None
elif self.new:
result["++"] = self.new_value if "++" in delta_filter else None
return result


class RichDict(UserDict):
"""
Expand Down
Loading

0 comments on commit cb74bd6

Please sign in to comment.