Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve understanding of revisions vs filters [#706] #711

Merged
merged 2 commits into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions kart/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from kart.completion_shared import path_completer
from kart.crs_util import CoordinateReferenceString
from kart.output_util import dump_json_output
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState


Expand Down Expand Up @@ -124,7 +124,7 @@ def feature_count_diff(
)
@click.argument(
"args",
metavar="[REVISION RANGE] [--] [FILTERS]",
metavar="[REVISIONS] [--] [FILTERS]",
nargs=-1,
type=click.UNPROCESSED,
shell_complete=path_completer,
Expand All @@ -144,23 +144,23 @@ def diff(
"""
Show changes between two commits, or between a commit and the working copy.

COMMIT_SPEC -
REVISIONS -

- if not supplied, the default is HEAD, to diff between HEAD and the working copy.

- if a single ref is supplied: commit-A - diffs between commit-A and the working copy.
- if a single revision is supplied: commit-A - diffs between commit-A and the working copy.

- if supplied with the form: commit-A...commit-B - diffs between commit-A and commit-B.

- supplying two seperate refs: commit-A commit-B - also diffs between commit-A and commit-B
- supplying two seperate revisions: commit-A commit-B - also diffs between commit-A and commit-B

- if supplied with the form: commit-A..commit-B - diffs between (the common ancestor of
commit-A and commit-B) and (commit-B).

To list only particular changes, supply one or more FILTERS of the form [DATASET[:PRIMARY_KEY]]
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(repo, args)
options, commits, filters = parse_revisions_and_filters(repo, args)
output_type, fmt = parse_output_format(output_format, json_style)

assert len(commits) <= 2
Expand Down
12 changes: 6 additions & 6 deletions kart/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from kart.exceptions import NotYetImplemented, SubprocessError
from kart.key_filters import RepoKeyFilter
from kart.output_util import dump_json_output
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -223,7 +223,7 @@ def convert_user_patterns_to_raw_paths(paths, repo, commits):
)
@click.argument(
"args",
metavar="[REVISION RANGE] [--] [FILTERS]",
metavar="[REVISIONS] [--] [FILTERS]",
nargs=-1,
shell_complete=path_completer,
)
Expand All @@ -238,15 +238,15 @@ def log(
):
"""
Show commit logs.
The REVISION RANGE can be a commit, a set of commits, or references to commits. A log containing those commits
The REVISIONS can be a commit, a set of commits, or references to commits. A log containing those commits
and all their ancestors will be output. The log of a particular range of commits can also be requested
using the format <commit1>..<commit2> - for more details, see https://git-scm.com/docs/git-log.
If FEATURES are specified, then only commits where those features were changed will be output. Entire
datasets can be specified by name, or individual features can be specified using the format
If FILTERS are specified, then only commits where the datasets or features specified were changed will be output.
Entire datasets can be specified by name, or individual features can be specified using the format
<dataset-name>:<feature-primary-key>.
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(
options, commits, filters = parse_revisions_and_filters(
repo, args, kwargs, allow_options=True
)

Expand Down
189 changes: 107 additions & 82 deletions kart/parse_args.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from enum import Enum, auto
import re
import warnings

from .cli_util import KartCommand, RemovalInKart012Warning


import click
import pygit2

from kart.cli_util import KartCommand, RemovalInKart012Warning
from kart.exceptions import NotFound


class PreserveDoubleDash(KartCommand):
"""
Expand Down Expand Up @@ -39,29 +39,15 @@ def parse_args(self, ctx, args):
return super(PreserveDoubleDash, self).parse_args(ctx, args)


class ArgType(Enum):
# Which revision(s) to display - a commit, ref, range, etc:
COMMIT = auto()
# How to log it.
OPTION = auto()
# Which item(s) the user is interested in. These must come last.
# In Git you filter by path so these are called paths - but we don't expose the internal path
# of most Kart items, so we we just call these filters.
FILTER = auto()


def get_arg_type(repo, arg, allow_options=True, allow_commits=True, allow_filters=True):
"""Decides if some user-supplied argument is a commit-ish or a filter (or even an option)."""

# We prefer to parse args as commits if at all plausible - if the user meant it to be a filter,
# they always have the possibility to force it to be a filter using "--".
# So we parse "foo...bar" as a commit range without checking if foo and bar exist - it's more likely that the user
# meant that, than that they want to filter to the path "foo...bar" and if it doesn't work, we'll error accordingly.

assert allow_commits or allow_filters

if arg.startswith("-"):
if allow_options:
def _separate_options(args, allow_options):
options = []
others = []
for arg in args:
if not arg.startswith("-"):
others.append(arg)
elif not allow_options:
raise click.UsageError(f"No such option: {arg}")
else:
# It's not explicitly stated by https://git-scm.com/docs/git-check-ref-format
# but this isn't a valid commit-ish.
# $ git branch -c -- -x
Expand All @@ -75,29 +61,96 @@ def get_arg_type(repo, arg, allow_options=True, allow_commits=True, allow_filter
f"This will be removed in Kart 0.12! Please comment on {issue_link} if you need to use this option.",
RemovalInKart012Warning,
)
return ArgType.OPTION
else:
raise click.UsageError(f"No such option: {arg}")
options.append(arg)
return options, others


if allow_commits:
if arg == "[EMPTY]" or ".." in arg:
return ArgType.COMMIT
def _append_kwargs_to_options(options, kwargs, allow_options):
if not kwargs:
return
assert allow_options

try:
repo.resolve_refish(arg)
return ArgType.COMMIT
except (KeyError, ValueError, pygit2.InvalidSpecError):
pass
for option_name, option_val in kwargs.items():
option_name = option_name.replace("_", "-", 1)
if isinstance(option_val, bool):
if option_val:
options.append(f"--{option_name}")
elif isinstance(option_val, (int, str)):
options.append(f"--{option_name}={option_val}")
elif isinstance(option_val, tuple):
options.extend([f"--{option_name}={o}" for o in option_val])


HEAD_PATTERN = re.compile(r"^HEAD\b")
RANGE_PATTERN = re.compile(r"[^/]\.{2,}[^/]")

HINT = "Use '--' to separate paths from revisions, like this:\n'kart <command> [<revision>...] -- [<filter>...]'"
SILENCING_HINT = "To silence this warning, use '--' to separate paths from revisions, like this:\n'kart <command> [<revision>...] -- [<filter>...]'\n"


def _is_revision(repo, arg, dedupe_warnings):
# These things *could* be a path, but in that case the user should add a `--` before this arg to
# disambiguate, and they haven't done that here.
if (
arg == "[EMPTY]"
or arg.endswith("^?")
or RANGE_PATTERN.search(arg)
or HEAD_PATTERN.search(arg)
):
return True

filter_path = arg.split(":", maxsplit=1)[0]
head_tree = repo.head_tree
is_useful_filter_at_head = head_tree and filter_path in head_tree
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how's filter_path in head_tree work? It looks like filter_path is something like ["<datasetname>", "<pk_value>"] and you're passing it to a pygit2.Tree.__contains__, which doesn't know about our path encodings. Or am I missing something?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only pass the first part of a filter to it - everything before the ":" - which is a path with no particular encoding. Everything after that is ignored. So if you have an arg like "points:feature:25" and a dataset called "points" and no revision called "points", we're happy to assume your arg is a filter - even without encoding the feature PK to see if it exists, it's enough that the dataset exists.

Copy link
Member

@craigds craigds Sep 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, I misread L102


if ":" in arg:
if not is_useful_filter_at_head:
click.echo(
f"Assuming '{arg}' is a filter argument (that doesn't match anything at HEAD)",
err=True,
)
dedupe_warnings.add(SILENCING_HINT)
return False

try:
repo.resolve_refish(arg)
is_revision = True
except (KeyError, ValueError, pygit2.InvalidSpecError):
is_revision = False

if is_revision and not is_useful_filter_at_head:
return True
elif is_useful_filter_at_head and not is_revision:
return False
elif is_revision and is_useful_filter_at_head:
raise click.UsageError(
f"Ambiguous argument '{arg}' - could be either a revision or a filter\n{HINT}"
)
else:
raise NotFound(
f"Ambiguous argument '{arg}' - doesn't appear to be either a revision or a filter\n{HINT}"
)

if allow_filters:
return ArgType.FILTER

raise click.UsageError(
f"Argument not recognised as a valid commit, ref, or range: {arg}"
)
def _disambiguate_revisions_and_filters(repo, args):
revisions = []
filters = []
dedupe_warnings = set()
for i, arg in enumerate(args):
if _is_revision(repo, arg, dedupe_warnings):
if filters:
raise click.UsageError(
f"Filter argument '{filters[0]}' should go after revision argument '{arg}'\n{HINT}"
)
revisions.append(arg)
else:
filters.append(arg)
for warning in dedupe_warnings:
click.echo(warning, err=True)
return revisions, filters


def parse_commits_and_filters(
def parse_revisions_and_filters(
repo,
args,
kwargs=None,
Expand All @@ -108,6 +161,9 @@ def parse_commits_and_filters(
Returns a three-tuple: (options, commits/refs/ranges, filters)
"""

# If kwargs are passed, allow_options must be set.
assert allow_options or not kwargs

# As soon as we encounter a filter, we assume all remaining args are also filters.
# i.e. the filters must be given *last*.
# If it's ambiguous whether something is a filter or not, we assume it's a commit-ish.
Expand All @@ -118,42 +174,11 @@ def parse_commits_and_filters(
dash_index = args.index("--")
filters = list(args[dash_index + 1 :])
args = args[:dash_index]
options, revisions = _separate_options(args, allow_options)
_append_kwargs_to_options(options, kwargs, allow_options)
return options, revisions, filters
else:
dash_index = None
filters = []

options = []
commits = []

lists_by_type = {
ArgType.OPTION: options,
ArgType.COMMIT: commits,
ArgType.FILTER: filters,
}

allow_commits = True
allow_filters = dash_index is None
for arg in args:
arg_type = get_arg_type(
repo,
arg,
allow_options=allow_options,
allow_commits=allow_commits,
allow_filters=allow_filters,
)
lists_by_type[arg_type].append(arg)
if arg_type == ArgType.FILTER:
allow_commits = False

if kwargs is not None and allow_options:
for option_name, option_val in kwargs.items():
option_name = option_name.replace("_", "-", 1)
t = type(option_val)
if t == int or t == str:
options.append(f"--{option_name}={option_val}")
elif t == tuple:
options.extend([f"--{option_name}={o}" for o in option_val]),
elif t == bool and option_val:
options.append(f"--{option_name}")

return options, commits, filters
options, args = _separate_options(args, allow_options)
_append_kwargs_to_options(options, kwargs, allow_options)
revisions, filters = _disambiguate_revisions_and_filters(repo, args)
return options, revisions, filters
8 changes: 5 additions & 3 deletions kart/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from kart.cli_util import KartCommand, OutputFormatType, parse_output_format
from kart.completion_shared import path_completer
from kart.crs_util import CoordinateReferenceString
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState


Expand Down Expand Up @@ -88,10 +88,12 @@ def show(
args,
):
"""
Show the given commit, or HEAD
Shows the given REVISION, or HEAD if none is specified.

To list only particular changes, supply one or more FILTERS of the form [DATASET[:PRIMARY_KEY]]
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(repo, args)
options, commits, filters = parse_revisions_and_filters(repo, args)

if len(commits) > 1:
raise click.BadParameter(
Expand Down