Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve understanding of revisions vs filters [#706] #711

Merged
merged 2 commits into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions kart/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from kart.completion_shared import path_completer
from kart.crs_util import CoordinateReferenceString
from kart.output_util import dump_json_output
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState


Expand Down Expand Up @@ -124,7 +124,7 @@ def feature_count_diff(
)
@click.argument(
"args",
metavar="[REVISION RANGE] [--] [FILTERS]",
metavar="[REVISIONS] [--] [FILTERS]",
nargs=-1,
type=click.UNPROCESSED,
shell_complete=path_completer,
Expand All @@ -144,23 +144,23 @@ def diff(
"""
Show changes between two commits, or between a commit and the working copy.

COMMIT_SPEC -
REVISIONS -

- if not supplied, the default is HEAD, to diff between HEAD and the working copy.

- if a single ref is supplied: commit-A - diffs between commit-A and the working copy.
- if a single revision is supplied: commit-A - diffs between commit-A and the working copy.

- if supplied with the form: commit-A...commit-B - diffs between commit-A and commit-B.

- supplying two seperate refs: commit-A commit-B - also diffs between commit-A and commit-B
- supplying two seperate revisions: commit-A commit-B - also diffs between commit-A and commit-B

- if supplied with the form: commit-A..commit-B - diffs between (the common ancestor of
commit-A and commit-B) and (commit-B).

To list only particular changes, supply one or more FILTERS of the form [DATASET[:PRIMARY_KEY]]
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(repo, args)
options, commits, filters = parse_revisions_and_filters(repo, args)
output_type, fmt = parse_output_format(output_format, json_style)

assert len(commits) <= 2
Expand Down
12 changes: 6 additions & 6 deletions kart/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from kart.exceptions import NotYetImplemented, SubprocessError
from kart.key_filters import RepoKeyFilter
from kart.output_util import dump_json_output
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -223,7 +223,7 @@ def convert_user_patterns_to_raw_paths(paths, repo, commits):
)
@click.argument(
"args",
metavar="[REVISION RANGE] [--] [FILTERS]",
metavar="[REVISIONS] [--] [FILTERS]",
nargs=-1,
shell_complete=path_completer,
)
Expand All @@ -238,15 +238,15 @@ def log(
):
"""
Show commit logs.
The REVISION RANGE can be a commit, a set of commits, or references to commits. A log containing those commits
The REVISIONS can be a commit, a set of commits, or references to commits. A log containing those commits
and all their ancestors will be output. The log of a particular range of commits can also be requested
using the format <commit1>..<commit2> - for more details, see https://git-scm.com/docs/git-log.
If FEATURES are specified, then only commits where those features were changed will be output. Entire
datasets can be specified by name, or individual features can be specified using the format
If FILTERS are specified, then only commits where the datasets or features specified were changed will be output.
Entire datasets can be specified by name, or individual features can be specified using the format
<dataset-name>:<feature-primary-key>.
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(
options, commits, filters = parse_revisions_and_filters(
repo, args, kwargs, allow_options=True
)

Expand Down
182 changes: 100 additions & 82 deletions kart/parse_args.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from enum import Enum, auto
import re
import warnings

from .cli_util import KartCommand, RemovalInKart012Warning


import click
import pygit2

from kart.cli_util import KartCommand, RemovalInKart012Warning
from kart.exceptions import NotFound


class PreserveDoubleDash(KartCommand):
"""
Expand Down Expand Up @@ -39,29 +39,15 @@ def parse_args(self, ctx, args):
return super(PreserveDoubleDash, self).parse_args(ctx, args)


class ArgType(Enum):
# Which revision(s) to display - a commit, ref, range, etc:
COMMIT = auto()
# How to log it.
OPTION = auto()
# Which item(s) the user is interested in. These must come last.
# In Git you filter by path so these are called paths - but we don't expose the internal path
# of most Kart items, so we we just call these filters.
FILTER = auto()


def get_arg_type(repo, arg, allow_options=True, allow_commits=True, allow_filters=True):
"""Decides if some user-supplied argument is a commit-ish or a filter (or even an option)."""

# We prefer to parse args as commits if at all plausible - if the user meant it to be a filter,
# they always have the possibility to force it to be a filter using "--".
# So we parse "foo...bar" as a commit range without checking if foo and bar exist - it's more likely that the user
# meant that, than that they want to filter to the path "foo...bar" and if it doesn't work, we'll error accordingly.

assert allow_commits or allow_filters

if arg.startswith("-"):
if allow_options:
def _separate_options(args, allow_options):
options = []
others = []
for arg in args:
if not arg.startswith("-"):
others.append(arg)
elif not allow_options:
raise click.UsageError(f"No such option: {arg}")
else:
# It's not explicitly stated by https://git-scm.com/docs/git-check-ref-format
# but this isn't a valid commit-ish.
# $ git branch -c -- -x
Expand All @@ -75,29 +61,89 @@ def get_arg_type(repo, arg, allow_options=True, allow_commits=True, allow_filter
f"This will be removed in Kart 0.12! Please comment on {issue_link} if you need to use this option.",
RemovalInKart012Warning,
)
return ArgType.OPTION
else:
raise click.UsageError(f"No such option: {arg}")
options.append(arg)
return options, others


def _append_kwargs_to_options(options, kwargs, allow_options):
if not kwargs:
return
assert allow_options

for option_name, option_val in kwargs.items():
option_name = option_name.replace("_", "-", 1)
t = type(option_val)
if t == int or t == str:
options.append(f"--{option_name}={option_val}")
elif t == tuple:
options.extend([f"--{option_name}={o}" for o in option_val]),
elif t == bool and option_val:
options.append(f"--{option_name}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just use isinstance

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 good call but I find it amusing how you recommend style changes to your own code

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep. not sure what i was thinking when i wrote it, that code doesn't seem like something I would write normally. Maybe I wrote it before I had a ☕


if allow_commits:
if arg == "[EMPTY]" or ".." in arg:
return ArgType.COMMIT

try:
repo.resolve_refish(arg)
return ArgType.COMMIT
except (KeyError, ValueError, pygit2.InvalidSpecError):
pass
HEAD_PATTERN = re.compile(r"^HEAD\b", re.IGNORECASE)
craigds marked this conversation as resolved.
Show resolved Hide resolved

if allow_filters:
return ArgType.FILTER
HINT = "Use '--' to separate paths from revisions, like this:\n'kart <command> [<revision>...] -- [<filter>...]'"
SILENCING_HINT = "To silence this warning, use '--' to separate paths from revisions, like this:\n'kart <command> [<revision>...] -- [<filter>...]'\n"

raise click.UsageError(
f"Argument not recognised as a valid commit, ref, or range: {arg}"
)

def _is_revision(repo, arg, warning_set):
# These things *could* be a path, but in that case the user should add a `--` before this arg to
# disambiguate, and they haven't done that here.
if arg == "[EMPTY]" or ".." in arg or HEAD_PATTERN.match(arg) or arg.endswith("^?"):
craigds marked this conversation as resolved.
Show resolved Hide resolved
return True

def parse_commits_and_filters(
filter_path = arg.split(":", maxsplit=1)[0]
head_tree = repo.head_tree
is_useful_filter_at_head = head_tree and filter_path in head_tree
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how's filter_path in head_tree work? It looks like filter_path is something like ["<datasetname>", "<pk_value>"] and you're passing it to a pygit2.Tree.__contains__, which doesn't know about our path encodings. Or am I missing something?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only pass the first part of a filter to it - everything before the ":" - which is a path with no particular encoding. Everything after that is ignored. So if you have an arg like "points:feature:25" and a dataset called "points" and no revision called "points", we're happy to assume your arg is a filter - even without encoding the feature PK to see if it exists, it's enough that the dataset exists.

Copy link
Member

@craigds craigds Sep 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, I misread L102


if ":" in arg and not is_useful_filter_at_head:
click.echo(
f"Assuming '{arg}' is a filter argument (that doesn't match anything at HEAD)",
err=True,
)
warning_set.add(SILENCING_HINT)
return False

try:
repo.resolve_refish(arg)
is_revision = True
except (KeyError, ValueError, pygit2.InvalidSpecError):
is_revision = False

if is_revision and not is_useful_filter_at_head:
return True
elif is_useful_filter_at_head and not is_revision:
return False
elif is_revision and is_useful_filter_at_head:
raise click.UsageError(
f"Ambiguous argument '{arg}' - could be either a revision or a filter\n{HINT}"
)
else:
raise NotFound(
f"Ambiguous argument '{arg}' - doesn't appear to be either a revision or a filter\n{HINT}"
)


def _disambiguate_revisions_and_filters(repo, args):
revisions = []
filters = []
warning_set = set()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure this needs to be a set; it looks like you're only adding SILENCING_HINT, so there should be other ways to dedupe that. Making it a set means the output will be nondeterministically ordered if you ever add a second thing to the set.

Perhaps we should configure the warnings module? It will de-dupe warnings automatically if configured correctly

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... the warnings module looks great for deprecating things in modules that will be imported by third parties, etc etc. But I am just trying to print something once: I don't need auto line numbers + module names, or configuration options, and I especially don't want it to remember which warnings it has already printed for any longer than this function call (as could happen with our testing framework / kart helper). It's not beautiful how it is but I've just spent 10 minutes thinking about this and I think it's not really worth spending any longer on it - there's actually nothing measurably wrong with it right now

Copy link
Member

@craigds craigds Sep 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kart helper

yep, that could use some work. Looks like there's no simple way to reset the warning registry globally: https://bugs.python.org/issue21724 - which we'd need to make this work well for kart helper

as could happen with our testing framework

AFAIK pytest resets the warnings state between each test, by default. Maybe it uses the hacky workaround in that python ticket 🤷 . It also has really good support for the warnings module if you want to check for warnings in your tests: https://docs.pytest.org/en/latest/how-to/capture-warnings.html

So in that case, I guess leave this as it is 👍

for i, arg in enumerate(args):
if _is_revision(repo, arg, warning_set):
if filters:
raise click.UsageError(
f"Filter argument '{filters[0]}' should go after revision argument '{arg}'\n{HINT}"
)
revisions.append(arg)
else:
filters.append(arg)
for warning in warning_set:
click.echo(warning, err=True)
return revisions, filters


def parse_revisions_and_filters(
repo,
args,
kwargs=None,
Expand All @@ -108,6 +154,9 @@ def parse_commits_and_filters(
Returns a three-tuple: (options, commits/refs/ranges, filters)
"""

# If kwargs are passed, allow_options must be set.
assert allow_options or not kwargs

# As soon as we encounter a filter, we assume all remaining args are also filters.
# i.e. the filters must be given *last*.
# If it's ambiguous whether something is a filter or not, we assume it's a commit-ish.
Expand All @@ -118,42 +167,11 @@ def parse_commits_and_filters(
dash_index = args.index("--")
filters = list(args[dash_index + 1 :])
args = args[:dash_index]
options, revisions = _separate_options(args, allow_options)
_append_kwargs_to_options(options, kwargs, allow_options)
return options, revisions, filters
else:
dash_index = None
filters = []

options = []
commits = []

lists_by_type = {
ArgType.OPTION: options,
ArgType.COMMIT: commits,
ArgType.FILTER: filters,
}

allow_commits = True
allow_filters = dash_index is None
for arg in args:
arg_type = get_arg_type(
repo,
arg,
allow_options=allow_options,
allow_commits=allow_commits,
allow_filters=allow_filters,
)
lists_by_type[arg_type].append(arg)
if arg_type == ArgType.FILTER:
allow_commits = False

if kwargs is not None and allow_options:
for option_name, option_val in kwargs.items():
option_name = option_name.replace("_", "-", 1)
t = type(option_val)
if t == int or t == str:
options.append(f"--{option_name}={option_val}")
elif t == tuple:
options.extend([f"--{option_name}={o}" for o in option_val]),
elif t == bool and option_val:
options.append(f"--{option_name}")

return options, commits, filters
options, args = _separate_options(args, allow_options)
_append_kwargs_to_options(options, kwargs, allow_options)
revisions, filters = _disambiguate_revisions_and_filters(repo, args)
return options, revisions, filters
8 changes: 5 additions & 3 deletions kart/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from kart.cli_util import KartCommand, OutputFormatType, parse_output_format
from kart.completion_shared import path_completer
from kart.crs_util import CoordinateReferenceString
from kart.parse_args import PreserveDoubleDash, parse_commits_and_filters
from kart.parse_args import PreserveDoubleDash, parse_revisions_and_filters
from kart.repo import KartRepoState


Expand Down Expand Up @@ -88,10 +88,12 @@ def show(
args,
):
"""
Show the given commit, or HEAD
Shows the given REVISION, or HEAD if none in specified.
craigds marked this conversation as resolved.
Show resolved Hide resolved

To list only particular changes, supply one or more FILTERS of the form [DATASET[:PRIMARY_KEY]]
"""
repo = ctx.obj.get_repo(allowed_states=KartRepoState.ALL_STATES)
options, commits, filters = parse_commits_and_filters(repo, args)
options, commits, filters = parse_revisions_and_filters(repo, args)

if len(commits) > 1:
raise click.BadParameter(
Expand Down