Skip to content

Commit

Permalink
Improve existing --no-checkout flag
Browse files Browse the repository at this point in the history
kart import supports --no-checkout, but it merely postpones
creating a working copy. As of this commit, it will add
also add the dataset to a set of non-checkout datasets in the
config, so the dataset will stay out of the WC indefinitely.

Also adds `kart checkout --dataset=foo --not-dataset=bar` flags
to add and remove datasets from the set of non-checkout datasets
  • Loading branch information
olsen232 committed Oct 19, 2023
1 parent 2f66e97 commit 1872779
Show file tree
Hide file tree
Showing 13 changed files with 303 additions and 33 deletions.
6 changes: 5 additions & 1 deletion kart/byod/point_cloud_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
6 changes: 5 additions & 1 deletion kart/byod/raster_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
93 changes: 91 additions & 2 deletions kart/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@
type=SpatialFilterString(encoding="utf-8"),
help=spatial_filter_help_text(),
)
@click.option(
"--dataset",
"do_checkout_spec",
multiple=True,
help="Request that a particular dataset be checked out (one which is currently configured to not be checked out)",
)
@click.option(
"--not-dataset",
"non_checkout_spec",
multiple=True,
help="Request that a particular dataset *not* be checked out (one which is currently configured to be checked out)",
)
@click.argument("refish", default=None, required=False, shell_complete=ref_completer)
def checkout(
ctx,
Expand All @@ -58,6 +70,8 @@ def checkout(
discard_changes,
do_guess,
spatial_filter_spec,
do_checkout_spec,
non_checkout_spec,
refish,
):
"""Switch branches or restore working tree files"""
Expand Down Expand Up @@ -126,8 +140,38 @@ def checkout(
"The spatial filter has been updated in the config and no longer matches the working copy."
)

non_checkout_datasets = repo.non_checkout_datasets
if do_checkout_spec or non_checkout_spec:
do_checkout_spec = set(do_checkout_spec)
non_checkout_spec = set(non_checkout_spec)
_verify_checkout_datasets_spec(
repo,
commit,
refish,
do_checkout_spec,
non_checkout_spec,
non_checkout_datasets,
)
non_checkout_datasets = (
non_checkout_datasets | non_checkout_spec
) - do_checkout_spec

do_switch_checkout_datasets = not repo.working_copy.matches_non_checkout_datasets(
non_checkout_datasets
)

# Again, we also allow switching of set of checked out / non-checked out datasets just by
# writing it directly to the config and then running `kart checkout`, but using
# `kart checkout --dataset=foo --not-dataset=bar` is preferred.
if do_switch_checkout_datasets and not (do_checkout_spec or non_checkout_spec):
click.echo(
"The set of datasets to be checked out has been updated in the config and no longer matches the working copy."
)

discard_changes = discard_changes or force
if (do_switch_commit or do_switch_spatial_filter) and not discard_changes:
if (
do_switch_commit or do_switch_spatial_filter or do_switch_checkout_datasets
) and not discard_changes:
ctx.obj.check_not_dirty(help_message=_DISCARD_CHANGES_HELP_MESSAGE)

if new_branch and new_branch in repo.branches:
Expand Down Expand Up @@ -170,17 +214,37 @@ def checkout(
if spatial_filter_spec is not None:
spatial_filter_spec.write_config(repo, update_remote=promisor_remote)

if do_checkout_spec or non_checkout_spec:
repo.configure_do_checkout_datasets(do_checkout_spec, True)
repo.configure_do_checkout_datasets(non_checkout_spec, False)

TableWorkingCopy.ensure_config_exists(repo)
repo.set_head(head_ref)

repo_key_filter = (
RepoKeyFilter.exclude_datasets(non_checkout_datasets)
if non_checkout_datasets
else RepoKeyFilter.MATCH_ALL
)
parts_to_create = (
repo.datasets().working_copy_part_types() if not repo.head_is_unborn else ()
repo.datasets(repo_key_filter=repo_key_filter).working_copy_part_types()
if not repo.head_is_unborn
else ()
)

if do_switch_commit or do_switch_spatial_filter or discard_changes:
# Changing commit, changing spatial filter, or discarding changes mean we need to update every dataset:
repo.working_copy.reset_to_head(
rewrite_full=do_switch_spatial_filter,
create_parts_if_missing=parts_to_create,
non_checkout_datasets=non_checkout_datasets,
)
elif do_switch_checkout_datasets:
# Not doing any of the above - just need to change those datasets newly added / removed from the non_checkout_list.
repo.working_copy.reset_to_head(
non_checkout_datasets=non_checkout_datasets,
only_update_checkout_datasets=True,
create_parts_if_missing=parts_to_create,
)
elif parts_to_create:
# Possibly we needn't auto-create any working copy here at all, but lots of tests currently depend on it.
Expand All @@ -189,6 +253,31 @@ def checkout(
)


def _verify_checkout_datasets_spec(
repo, commit, refish, do_checkout_spec, non_checkout_spec, non_checkout_datasets
):
# Check the set of datasets that the user wants to check out / not check out, to make sure we've heard of them.
# (avoid the bad experience where the user disables check out of non-existing dataset "foo-bar" instead of "foo_bar").
if do_checkout_spec & non_checkout_spec:
bad_ds = next(iter(do_checkout_spec & non_checkout_spec))
raise click.BadParameter(
f"Dataset {bad_ds} should not be present in both --dataset and --not-dataset",
param_hint="dataset",
)
# Only datasets that are not already in the config are checked - if the user managed to mark it as non-checkout before,
# they can mark it as checkout now, even if we can't find it any more.
new_spec = (do_checkout_spec | non_checkout_spec) - non_checkout_datasets
if not new_spec:
return
datasets_at_commit = repo.datasets(commit)
for ds_path in new_spec:
if ds_path not in datasets_at_commit:
raise click.BadParameter(
f"No dataset {ds_path} at commit {refish or 'HEAD'}",
param_hint="dataset" if ds_path in do_checkout_spec else "not-dataset",
)


@functools.lru_cache()
def _git_fetch_supports_flag(repo, flag):
r = subprocess.run(
Expand Down
6 changes: 5 additions & 1 deletion kart/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def list_import_formats(ctx):
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--dataset-path", "--dataset", "ds_path", help="The dataset's path once imported"
Expand Down
6 changes: 5 additions & 1 deletion kart/point_cloud/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
6 changes: 5 additions & 1 deletion kart/raster/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--replace-existing",
Expand Down
25 changes: 25 additions & 0 deletions kart/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,31 @@ def spatial_filter(self):

return SpatialFilter.from_repo_config(self)

def configure_do_checkout_datasets(self, dataset_paths, do_checkout):
for dataset_path in dataset_paths:
key = f"dataset.{dataset_path}.checkout"
if do_checkout:
# Checking out a dataset is the default, we don't clutter the config with it.
self.del_config(key)
else:
# Specifically mark this dataset as do-not-checkout.
self.config[key] = False

@property
def non_checkout_datasets(self):
result = set()
config = self.config
for entry in config:
parts = entry.name.split(".")
if (
len(parts) == 3
and parts[0] == "dataset"
and parts[2] == "checkout"
and not config.get_bool(entry.name)
):
result.add(parts[1])
return result

def get_config_str(self, key, default=None):
return self.config[key] if key in self.config else default

Expand Down
7 changes: 6 additions & 1 deletion kart/tabular/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,11 @@ def any_at_all(iterable):
"do_checkout",
is_flag=True,
default=True,
help="Whether to create a working copy once the import is finished, if no working copy exists yet.",
help=(
"Whether to check out the dataset once the import is finished. If false, the dataset will be configured as "
"not being checked out and will never be written to the working copy, until this decision is reversed by "
"running `kart checkout --dataset=DATASET-PATH`."
),
)
@click.option(
"--num-workers",
Expand Down Expand Up @@ -346,6 +350,7 @@ def table_import(

# During imports we can keep old changes since they won't conflict with newly imported datasets.
parts_to_create = [PartType.TABULAR] if do_checkout else []
repo.configure_do_checkout_datasets(new_ds_paths, do_checkout)
repo.working_copy.reset_to_head(
repo_key_filter=RepoKeyFilter.datasets(new_ds_paths),
create_parts_if_missing=parts_to_create,
Expand Down
6 changes: 4 additions & 2 deletions kart/tabular/working_copy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,9 @@ def diff_dataset_to_working_copy(
return DatasetDiff()

feature_filter = ds_filter.get("feature", ds_filter.child_type())
with self.session():
with self.session() as sess:
if self._is_noncheckout_dataset(sess, dataset):
return DatasetDiff()
meta_diff = self.diff_dataset_to_working_copy_meta(dataset, raise_if_dirty)
feature_diff = self.diff_dataset_to_working_copy_feature(
dataset, feature_filter, meta_diff, raise_if_dirty
Expand Down Expand Up @@ -1181,12 +1183,12 @@ def _delete_meta(self, sess, dataset):

def _do_reset_datasets(
self,
*,
base_datasets,
target_datasets,
ds_inserts,
ds_updates,
ds_deletes,
*,
base_tree=None,
target_tree=None,
target_commit=None,
Expand Down
1 change: 1 addition & 0 deletions kart/tile/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ def import_tiles(self):
self.repo.references[fast_import_on_branch].delete()

parts_to_create = [PartType.WORKDIR] if self.do_checkout else []
self.repo.configure_do_checkout_datasets([self.dataset_path], self.do_checkout)
# During imports we can keep old changes since they won't conflict with newly imported datasets.
self.repo.working_copy.reset_to_head(
repo_key_filter=RepoKeyFilter.datasets([self.dataset_path]),
Expand Down
2 changes: 1 addition & 1 deletion kart/workdir.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,12 @@ def _is_head(self, commit_or_tree):

def _do_reset_datasets(
self,
*,
base_datasets,
target_datasets,
ds_inserts,
ds_updates,
ds_deletes,
*,
base_tree=None,
target_tree=None,
target_commit=None,
Expand Down
Loading

0 comments on commit 1872779

Please sign in to comment.