diff --git a/CHANGELOG.md b/CHANGELOG.md index 407c05d6..3195b633 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where ## 0.14.3 (UNRELEASED) +- Adds support for disabling the working-copy checkout of specific datasets using the commands `kart import DATASET --no-checkout` or `kart checkout --not-dataset=DATASET`, and re-enabling it using `kart checkout --dataset=DATASET`. [#926](https://github.com/koordinates/kart/pull/926) - Adds information on referencing and citing Kart to `CITATION`. [#914](https://github.com/koordinates/kart/pull/914) - Fixes a bug where Kart would misidentify a non-Kart repo as a Kart V1 repo in some circumstances. [#918](https://github.com/koordinates/kart/issues/918) diff --git a/kart/byod/point_cloud_import.py b/kart/byod/point_cloud_import.py index ce344880..0d15a935 100644 --- a/kart/byod/point_cloud_import.py +++ b/kart/byod/point_cloud_import.py @@ -25,7 +25,11 @@ "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--replace-existing", diff --git a/kart/byod/raster_import.py b/kart/byod/raster_import.py index 9bf7ac8a..32b35a8e 100644 --- a/kart/byod/raster_import.py +++ b/kart/byod/raster_import.py @@ -25,7 +25,11 @@ "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--replace-existing", diff --git a/kart/checkout.py b/kart/checkout.py index a2522b3e..4f9278ea 100644 --- a/kart/checkout.py +++ b/kart/checkout.py @@ -50,6 +50,18 @@ type=SpatialFilterString(encoding="utf-8"), help=spatial_filter_help_text(), ) +@click.option( + "--dataset", + "do_checkout_spec", + multiple=True, + help="Request that a particular dataset be checked out (one which is currently configured to not be checked out)", +) +@click.option( + "--not-dataset", + "non_checkout_spec", + multiple=True, + help="Request that a particular dataset *not* be checked out (one which is currently configured to be checked out)", +) @click.argument("refish", default=None, required=False, shell_complete=ref_completer) def checkout( ctx, @@ -58,6 +70,8 @@ def checkout( discard_changes, do_guess, spatial_filter_spec, + do_checkout_spec, + non_checkout_spec, refish, ): """Switch branches or restore working tree files""" @@ -126,8 +140,38 @@ def checkout( "The spatial filter has been updated in the config and no longer matches the working copy." ) + non_checkout_datasets = repo.non_checkout_datasets + if do_checkout_spec or non_checkout_spec: + do_checkout_spec = set(do_checkout_spec) + non_checkout_spec = set(non_checkout_spec) + _verify_checkout_datasets_spec( + repo, + commit, + refish, + do_checkout_spec, + non_checkout_spec, + non_checkout_datasets, + ) + non_checkout_datasets = ( + non_checkout_datasets | non_checkout_spec + ) - do_checkout_spec + + do_switch_checkout_datasets = not repo.working_copy.matches_non_checkout_datasets( + non_checkout_datasets + ) + + # Again, we also allow switching of set of checked out / non-checked out datasets just by + # writing it directly to the config and then running `kart checkout`, but using + # `kart checkout --dataset=foo --not-dataset=bar` is preferred. + if do_switch_checkout_datasets and not (do_checkout_spec or non_checkout_spec): + click.echo( + "The set of datasets to be checked out has been updated in the config and no longer matches the working copy." + ) + discard_changes = discard_changes or force - if (do_switch_commit or do_switch_spatial_filter) and not discard_changes: + if ( + do_switch_commit or do_switch_spatial_filter or do_switch_checkout_datasets + ) and not discard_changes: ctx.obj.check_not_dirty(help_message=_DISCARD_CHANGES_HELP_MESSAGE) if new_branch and new_branch in repo.branches: @@ -170,17 +214,37 @@ def checkout( if spatial_filter_spec is not None: spatial_filter_spec.write_config(repo, update_remote=promisor_remote) + if do_checkout_spec or non_checkout_spec: + repo.configure_do_checkout_datasets(do_checkout_spec, True) + repo.configure_do_checkout_datasets(non_checkout_spec, False) + TableWorkingCopy.ensure_config_exists(repo) repo.set_head(head_ref) + repo_key_filter = ( + RepoKeyFilter.exclude_datasets(non_checkout_datasets) + if non_checkout_datasets + else RepoKeyFilter.MATCH_ALL + ) parts_to_create = ( - repo.datasets().working_copy_part_types() if not repo.head_is_unborn else () + repo.datasets(repo_key_filter=repo_key_filter).working_copy_part_types() + if not repo.head_is_unborn + else () ) if do_switch_commit or do_switch_spatial_filter or discard_changes: + # Changing commit, changing spatial filter, or discarding changes mean we need to update every dataset: repo.working_copy.reset_to_head( rewrite_full=do_switch_spatial_filter, create_parts_if_missing=parts_to_create, + non_checkout_datasets=non_checkout_datasets, + ) + elif do_switch_checkout_datasets: + # Not doing any of the above - just need to change those datasets newly added / removed from the non_checkout_list. + repo.working_copy.reset_to_head( + non_checkout_datasets=non_checkout_datasets, + only_update_checkout_datasets=True, + create_parts_if_missing=parts_to_create, ) elif parts_to_create: # Possibly we needn't auto-create any working copy here at all, but lots of tests currently depend on it. @@ -189,6 +253,31 @@ def checkout( ) +def _verify_checkout_datasets_spec( + repo, commit, refish, do_checkout_spec, non_checkout_spec, non_checkout_datasets +): + # Check the set of datasets that the user wants to check out / not check out, to make sure we've heard of them. + # (avoid the bad experience where the user disables check out of non-existing dataset "foo-bar" instead of "foo_bar"). + if do_checkout_spec & non_checkout_spec: + bad_ds = next(iter(do_checkout_spec & non_checkout_spec)) + raise click.BadParameter( + f"Dataset {bad_ds} should not be present in both --dataset and --not-dataset", + param_hint="dataset", + ) + # Only datasets that are not already in the config are checked - if the user managed to mark it as non-checkout before, + # they can mark it as checkout now, even if we can't find it any more. + new_spec = (do_checkout_spec | non_checkout_spec) - non_checkout_datasets + if not new_spec: + return + datasets_at_commit = repo.datasets(commit) + for ds_path in new_spec: + if ds_path not in datasets_at_commit: + raise click.BadParameter( + f"No dataset {ds_path} at commit {refish or 'HEAD'}", + param_hint="dataset" if ds_path in do_checkout_spec else "not-dataset", + ) + + @functools.lru_cache() def _git_fetch_supports_flag(repo, flag): r = subprocess.run( diff --git a/kart/import_.py b/kart/import_.py index a95ba779..3eed33f9 100644 --- a/kart/import_.py +++ b/kart/import_.py @@ -51,7 +51,11 @@ def list_import_formats(ctx): "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--dataset-path", "--dataset", "ds_path", help="The dataset's path once imported" diff --git a/kart/point_cloud/import_.py b/kart/point_cloud/import_.py index 0ba90341..82f4fc2b 100644 --- a/kart/point_cloud/import_.py +++ b/kart/point_cloud/import_.py @@ -41,7 +41,11 @@ "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--replace-existing", diff --git a/kart/raster/import_.py b/kart/raster/import_.py index 39ea6de0..b50f4097 100644 --- a/kart/raster/import_.py +++ b/kart/raster/import_.py @@ -41,7 +41,11 @@ "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--replace-existing", diff --git a/kart/repo.py b/kart/repo.py index 6232a63a..0f06d711 100644 --- a/kart/repo.py +++ b/kart/repo.py @@ -577,6 +577,35 @@ def spatial_filter(self): return SpatialFilter.from_repo_config(self) + def configure_do_checkout_datasets(self, dataset_paths, do_checkout): + for dataset_path in dataset_paths: + key = f"dataset.{dataset_path}.checkout" + if do_checkout: + # Checking out a dataset is the default, we don't clutter the config with it. + self.del_config(key) + else: + # Specifically mark this dataset as do-not-checkout. + self.config[key] = False + + @property + def non_checkout_datasets(self): + result = set() + config = self.config + for entry in config: + parts = entry.name.split(".", maxsplit=3) + if len(parts) > 3: + # Handle a name-containing-dots ie "dataset.NAME.CONTAINING.DOTS.checkout" + prefix, rest = entry.name.split(".", maxsplit=1) + parts = [prefix, *rest.rsplit(".", maxsplit=1)] + if ( + len(parts) == 3 + and parts[0] == "dataset" + and parts[2] == "checkout" + and not config.get_bool(entry.name) + ): + result.add(parts[1]) + return result + def get_config_str(self, key, default=None): return self.config[key] if key in self.config else default diff --git a/kart/structure.py b/kart/structure.py index b4a1fa1d..1ae947de 100644 --- a/kart/structure.py +++ b/kart/structure.py @@ -468,6 +468,9 @@ def __init__( self.filter_dataset_type = filter_dataset_type self.force_dataset_class = force_dataset_class + def __contains__(self, ds_path): + return self.get(ds_path) is not None + def __getitem__(self, ds_path): """Get a specific dataset by path.""" result = self.get(ds_path) diff --git a/kart/tabular/import_.py b/kart/tabular/import_.py index 418449d2..b9bcb660 100644 --- a/kart/tabular/import_.py +++ b/kart/tabular/import_.py @@ -154,7 +154,11 @@ def any_at_all(iterable): "do_checkout", is_flag=True, default=True, - help="Whether to create a working copy once the import is finished, if no working copy exists yet.", + help=( + "Whether to check out the dataset once the import is finished. If false, the dataset will be configured as " + "not being checked out and will never be written to the working copy, until this decision is reversed by " + "running `kart checkout --dataset=DATASET-PATH`." + ), ) @click.option( "--num-workers", @@ -346,6 +350,7 @@ def table_import( # During imports we can keep old changes since they won't conflict with newly imported datasets. parts_to_create = [PartType.TABULAR] if do_checkout else [] + repo.configure_do_checkout_datasets(new_ds_paths, do_checkout) repo.working_copy.reset_to_head( repo_key_filter=RepoKeyFilter.datasets(new_ds_paths), create_parts_if_missing=parts_to_create, diff --git a/kart/tabular/working_copy/base.py b/kart/tabular/working_copy/base.py index f52b948a..8fe9a869 100644 --- a/kart/tabular/working_copy/base.py +++ b/kart/tabular/working_copy/base.py @@ -500,7 +500,9 @@ def diff_dataset_to_working_copy( return DatasetDiff() feature_filter = ds_filter.get("feature", ds_filter.child_type()) - with self.session(): + with self.session() as sess: + if self._is_noncheckout_dataset(sess, dataset): + return DatasetDiff() meta_diff = self.diff_dataset_to_working_copy_meta(dataset, raise_if_dirty) feature_diff = self.diff_dataset_to_working_copy_feature( dataset, feature_filter, meta_diff, raise_if_dirty @@ -1181,12 +1183,12 @@ def _delete_meta(self, sess, dataset): def _do_reset_datasets( self, + *, base_datasets, target_datasets, ds_inserts, ds_updates, ds_deletes, - *, base_tree=None, target_tree=None, target_commit=None, diff --git a/kart/tile/importer.py b/kart/tile/importer.py index 6d70ecfb..e4e7dca1 100644 --- a/kart/tile/importer.py +++ b/kart/tile/importer.py @@ -333,6 +333,7 @@ def import_tiles(self): self.repo.references[fast_import_on_branch].delete() parts_to_create = [PartType.WORKDIR] if self.do_checkout else [] + self.repo.configure_do_checkout_datasets([self.dataset_path], self.do_checkout) # During imports we can keep old changes since they won't conflict with newly imported datasets. self.repo.working_copy.reset_to_head( repo_key_filter=RepoKeyFilter.datasets([self.dataset_path]), diff --git a/kart/tile/tile_dataset.py b/kart/tile/tile_dataset.py index ed864a0d..f5d0bcc1 100644 --- a/kart/tile/tile_dataset.py +++ b/kart/tile/tile_dataset.py @@ -395,6 +395,11 @@ def diff_to_working_copy( The resulting diffs are missing almost all of the info about the new tiles, but this is faster and more reliable if this information is not needed. """ + workdir = self.repo.working_copy.workdir + with workdir.state_session() as sess: + if workdir._is_noncheckout_dataset(sess, self.path): + return DatasetDiff() + tile_filter = ds_filter.get("tile", ds_filter.child_type()) current_metadata = self.tile_metadata @@ -684,7 +689,6 @@ def apply_tile_diff( """ with object_builder.chdir(self.inner_path): for delta in tile_diff.values(): - if delta.type in ("insert", "update"): new_val = delta.new_value name = new_val.get("name") diff --git a/kart/workdir.py b/kart/workdir.py index 1de31b4f..a0e0403a 100644 --- a/kart/workdir.py +++ b/kart/workdir.py @@ -290,12 +290,12 @@ def _is_head(self, commit_or_tree): def _do_reset_datasets( self, + *, base_datasets, target_datasets, ds_inserts, ds_updates, ds_deletes, - *, base_tree=None, target_tree=None, target_commit=None, diff --git a/kart/working_copy.py b/kart/working_copy.py index 0ed276c2..63e1756e 100644 --- a/kart/working_copy.py +++ b/kart/working_copy.py @@ -248,20 +248,24 @@ def reset_to_head( self, *, create_parts_if_missing=(), - quiet=False, repo_key_filter=RepoKeyFilter.MATCH_ALL, track_changes_as_dirty=False, rewrite_full=False, + non_checkout_datasets=None, + only_update_checkout_datasets=False, + quiet=False, ): """Reset all working copy parts to the head commit. See reset() below.""" self.reset( self.repo.head_commit, create_parts_if_missing=create_parts_if_missing, - quiet=quiet, repo_key_filter=repo_key_filter, track_changes_as_dirty=track_changes_as_dirty, rewrite_full=rewrite_full, + non_checkout_datasets=non_checkout_datasets, + only_update_checkout_datasets=only_update_checkout_datasets, + quiet=quiet, ) def reset( @@ -269,10 +273,12 @@ def reset( commit_or_tree, *, create_parts_if_missing=(), - quiet=False, repo_key_filter=RepoKeyFilter.MATCH_ALL, track_changes_as_dirty=False, rewrite_full=False, + non_checkout_datasets=None, + only_update_checkout_datasets=False, + quiet=False, ): """ Resets the working copy to the given target-commit (or target-tree). @@ -290,6 +296,13 @@ def reset( present at commit_or_tree will be written from scratch using write_full. Since write_full honours the current repo spatial filter, this also ensures that the working copy spatial filter is up to date. + + non_checkout_datasets is the set of datasets that the user has configured not to be checked out - loaded + from repo.non_checkout_datasets. (Supplied as an argument only to avoid reloading it from the config). + + If only_update_checkout_datasets is True, then only those datasets which have recently moved into or out of + repo.non_checkout_datasets will be updated (ie, fully-written or deleted). Each dataset part independently tracks + what the set of non_checkout_datasets were at last call to reset(), so each part handles this independently. """ created_parts = () @@ -313,6 +326,8 @@ def reset( repo_key_filter=repo_key_filter, track_changes_as_dirty=track_changes_as_dirty, rewrite_full=rewrite_full, + non_checkout_datasets=non_checkout_datasets, + only_update_checkout_datasets=only_update_checkout_datasets, quiet=quiet, ) @@ -320,10 +335,10 @@ def soft_reset_after_commit( self, commit_or_tree, *, - quiet=False, mark_as_clean=None, now_outside_spatial_filter=None, committed_diff=None, + quiet=False, ): """ Like a reset, this marks the working copy as now being based on the given target-tree (or the tree in the given @@ -365,6 +380,12 @@ def matches_spatial_filter_hash(self, spatial_filter_hash): return False return True + def matches_non_checkout_datasets(self, non_checkout_datasets): + for p in self.parts(): + if p.get_non_checkout_datasets() != non_checkout_datasets: + return False + return True + def parts_status(self): from kart.sqlalchemy import DbType @@ -449,6 +470,18 @@ def get_spatial_filter_hash(self): """Returns the spatial filter hash from the state table.""" return self.get_kart_state_value("*", "spatial-filter-hash") + def get_non_checkout_datasets(self): + kart_state = self.kart_tables.kart_state + with self.state_session() as sess: + r = sess.execute( + sa.select([kart_state.c.table_name]).where( + sa.and_( + kart_state.c.key == "checkout", kart_state.c.value == "false" + ) + ) + ) + return set(row[0] for row in r) + def get_kart_state_value(self, table_name, key): """Looks up a value from the kart-state table.""" kart_state = self.kart_tables.kart_state @@ -504,6 +537,30 @@ def _update_state_table_spatial_filter_hash(self, sess, spatial_filter_hash): ) return r.rowcount + def _update_state_table_non_checkout_datasets(self, sess, non_checkout_datasets): + kart_state = self.kart_tables.kart_state + sess.execute(sa.delete(kart_state).where(kart_state.c.key == "checkout")) + if non_checkout_datasets: + sess.execute( + kart_state.insert(), + [ + {"table_name": ds_path, "key": "checkout", "value": "false"} + for ds_path in sorted(non_checkout_datasets) + ], + ) + + def _is_noncheckout_dataset(self, sess, dataset): + dataset = dataset.path if hasattr(dataset, "path") else str(dataset) + kart_state = self.kart_tables.kart_state + value = sess.scalar( + sa.select([kart_state.c.value]).where( + sa.and_( + kart_state.c.table_name == dataset, kart_state.c.key == "checkout" + ) + ) + ) + return value == "false" + def reset( self, commit_or_tree, @@ -511,6 +568,8 @@ def reset( repo_key_filter=RepoKeyFilter.MATCH_ALL, track_changes_as_dirty=False, rewrite_full=False, + non_checkout_datasets=None, + only_update_checkout_datasets=False, quiet=False, ): """ @@ -575,6 +634,14 @@ def reset( ds_deletes = base_datasets.keys() - target_datasets.keys() ds_updates = base_datasets.keys() & target_datasets.keys() + self._handle_non_checkout_dataset_changes( + ds_inserts=ds_inserts, + ds_deletes=ds_deletes, + ds_updates=ds_updates, + non_checkout_datasets=non_checkout_datasets, + only_update_checkout_datasets=only_update_checkout_datasets, + ) + if rewrite_full: # No updates are "supported" since we are rewriting everything. ds_updates_unsupported = set(ds_updates) @@ -607,11 +674,11 @@ def reset( with session_context(): if ds_inserts or ds_updates or ds_deletes: self._do_reset_datasets( - base_datasets, - target_datasets, - ds_inserts, - ds_updates, - ds_deletes, + base_datasets=base_datasets, + target_datasets=target_datasets, + ds_inserts=ds_inserts, + ds_deletes=ds_deletes, + ds_updates=ds_updates, base_tree=base_tree, target_tree=target_tree, target_commit=target_commit, @@ -626,15 +693,18 @@ def reset( self._update_state_table_spatial_filter_hash( sess, self.repo.spatial_filter.hexhash ) + self._update_state_table_non_checkout_datasets( + sess, non_checkout_datasets + ) def _do_reset_datasets( self, + *, base_datasets, target_datasets, ds_inserts, ds_updates, ds_deletes, - *, base_tree=None, target_tree=None, target_commit=None, @@ -717,6 +787,52 @@ def _check_for_unsupported_structural_changes( f" Unfilterable structural changes are affecting:\n{unsupported_filters}" ) + def _handle_non_checkout_dataset_changes( + self, + *, + ds_inserts, + ds_deletes, + ds_updates, + non_checkout_datasets, + only_update_checkout_datasets, + ): + """ + Modify the planned list of datasets to create, delete, and update in the event that certain datasets + have recently been moved into or out of the set of non_checkout_datasets. + """ + + # Current set of non_checkout_datasets as requested by caller. + new_set = non_checkout_datasets or set() + # The value of repo.non_checkout_datasets as stored at last reset() is stored in the state table: + old_set = self.get_non_checkout_datasets() + + # This might looks a bit backwards - these are sets of things *not* to check out: + # we need to insert a dataset if it was in the old-set but not on the new-set. + ds_inserts_due_to_config_changes = old_set - new_set + ds_deletes_due_to_config_changes = new_set - old_set + + # We don't add anything any of (ds_inserts, ds_deletes, ds_updates) if it is not already present in + # at least one of those lists, since this indicates it is not currently relevant to this working copy part + # (ie, the wrong type of dataset, or non-existent at the current / previous commit). + + # Insert (rather than update) a dataset if it is newly removed from the no-checkout list. + ds_inserts |= ds_inserts_due_to_config_changes & ds_updates + ds_inserts -= ds_deletes_due_to_config_changes + + # Delete (rather than update) a dataset if it is newly added to the no-checkout list. + ds_deletes |= ds_deletes_due_to_config_changes & ds_updates + ds_deletes -= ds_inserts_due_to_config_changes + + # We can only update a dataset if it was already checked out and still will be checked out. + # That means if it is or was on the list of non_checkout_datasets, it shouldn't be on our update list. + ds_updates -= old_set + ds_updates -= new_set + + if only_update_checkout_datasets: + ds_inserts &= ds_inserts_due_to_config_changes + ds_deletes &= ds_deletes_due_to_config_changes + ds_updates.clear() + def handle_working_copy_tree_mismatch(wc_type_name, actual_tree_id, expected_tree_id): actual_tree_id = f"tree {actual_tree_id}" if actual_tree_id else "the empty tree" diff --git a/tests/byod/test_imports.py b/tests/byod/test_imports.py index 1fe94e93..8433dfcf 100644 --- a/tests/byod/test_imports.py +++ b/tests/byod/test_imports.py @@ -13,12 +13,10 @@ def test_byod_point_cloud_import( cli_runner, s3_test_data_point_cloud, check_lfs_hashes, + check_tile_is_reflinked, ): repo_path = tmp_path / "point-cloud-repo" - # Initing using --bare prevents the tiles from being fetched immediately. - # TODO: we need to make it configurable whether tiles for a dataset (particularly a BYOD dataset) - # are fetched or not, ie, support a per-dataset no-checkout flag. - r = cli_runner.invoke(["init", repo_path, "--bare"]) + r = cli_runner.invoke(["init", repo_path]) assert r.exit_code == 0 with chdir(repo_path): @@ -27,6 +25,7 @@ def test_byod_point_cloud_import( "byod-point-cloud-import", s3_test_data_point_cloud, "--dataset-path=auckland", + "--no-checkout", ] ) assert r.exit_code == 0, r.stderr @@ -86,9 +85,18 @@ def test_byod_point_cloud_import( "06bd15fbb6616cf63a4a410c5ba4666dab76177a58cb99c3fa2afb46c9dd6379 (f9ad3012492840d3c51b9b029a81c1cdbb11eef2) → s3://kart-bring-your-own-data-poc/auckland-small-laz1.2/auckland_1_3.laz", ] - r = cli_runner.invoke(["lfs+", "fetch"]) + r = cli_runner.invoke(["checkout", "--dataset=auckland"]) assert r.exit_code == 0, r.stderr + repo = KartRepo(repo_path) + check_lfs_hashes(repo, expected_file_count=16) + for x in range(4): + for y in range(4): + assert (repo_path / "auckland" / f"auckland_{x}_{y}.laz").is_file() + check_tile_is_reflinked( + repo_path / "auckland" / f"auckland_{x}_{y}.laz", repo + ) + r = cli_runner.invoke(["lfs+", "fetch", "--dry-run"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ @@ -96,8 +104,6 @@ def test_byod_point_cloud_import( " Found nothing to fetch", ] - check_lfs_hashes(KartRepo(repo_path), expected_file_count=16) - @pytest.mark.slow def test_byod_raster_import( @@ -106,10 +112,10 @@ def test_byod_raster_import( cli_runner, s3_test_data_raster, check_lfs_hashes, + check_tile_is_reflinked, ): repo_path = tmp_path / "point-cloud-repo" - # TODO: support a per-dataset no-checkout flag. - r = cli_runner.invoke(["init", repo_path, "--bare"]) + r = cli_runner.invoke(["init", repo_path]) assert r.exit_code == 0 with chdir(repo_path): @@ -118,6 +124,7 @@ def test_byod_raster_import( "byod-raster-import", s3_test_data_raster, "--dataset-path=erorisk_si", + "--no-checkout", ] ) assert r.exit_code == 0, r.stderr @@ -177,14 +184,18 @@ def test_byod_raster_import( "d8f514e654a81bdcd7428886a15e300c56b5a5ff92898315d16757562d2968ca (5f50b7e893da8782d5877177fab2e9a3b20fa9dc) → s3://kart-bring-your-own-data-poc/erorisk_si/erorisk_silcdb4.tif.aux.xml", ] - r = cli_runner.invoke(["lfs+", "fetch"]) + r = cli_runner.invoke(["checkout", "--dataset=erorisk_si"]) assert r.exit_code == 0, r.stderr + repo = KartRepo(repo_path) + check_lfs_hashes(repo, expected_file_count=2) + for file in ("erorisk_silcdb4.tif", "erorisk_silcdb4.tif.aux.xml"): + assert (repo_path / "erorisk_si" / file).is_file() + check_tile_is_reflinked(repo_path / "erorisk_si" / file, repo) + r = cli_runner.invoke(["lfs+", "fetch", "--dry-run"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == [ "Running fetch with --dry-run:", " Found nothing to fetch", ] - - check_lfs_hashes(KartRepo(repo_path), expected_file_count=2) diff --git a/tests/test_checkout.py b/tests/test_checkout.py index e133798a..0fe0e58a 100644 --- a/tests/test_checkout.py +++ b/tests/test_checkout.py @@ -18,7 +18,6 @@ ) def test_checkout_branches(data_archive, cli_runner, chdir, tmp_path, working_copy): with data_archive("points") as remote_path: - r = cli_runner.invoke(["checkout", "-b", "one"]) assert r.exit_code == 0, r.stderr r = cli_runner.invoke(["checkout", "-b", "two", "HEAD^"]) @@ -47,7 +46,6 @@ def test_checkout_branches(data_archive, cli_runner, chdir, tmp_path, working_co head = CommitWithReference.resolve(repo, "HEAD") with chdir(tmp_path): - r = cli_runner.invoke(["branch"]) assert r.exit_code == 0, r.stderr assert r.stdout.splitlines() == ["* four"] @@ -126,3 +124,50 @@ def test_reset(data_working_copy, cli_runner, edit_points): assert r.stdout.splitlines() == [ f"{H.POINTS.HEAD1_SHA} (HEAD -> main) Import from nz-pa-points-topo-150k.gpkg", ] + + +def _check_workingcopy_contains_tables(repo, expected_tables): + with repo.working_copy.tabular.session() as sess: + r = sess.execute("""SELECT name FROM sqlite_master SM WHERE type='table';""") + sqlite_table_names = set(row[0] for row in r) + + census_tables = set(t for t in sqlite_table_names if t.startswith("census")) + assert census_tables == expected_tables + + r = sess.execute("""SELECT table_name FROM gpkg_contents;""") + gpkg_contents_table_names = set(row[0] for row in r) + assert gpkg_contents_table_names == expected_tables + + +def test_non_checkout_datasets(data_working_copy, cli_runner): + with data_working_copy("au-census") as (repo_path, wc): + repo = KartRepo(repo_path) + _check_workingcopy_contains_tables( + repo, {"census2016_sdhca_ot_sos_short", "census2016_sdhca_ot_ra_short"} + ) + + r = cli_runner.invoke( + ["checkout", "--not-dataset=census2016-sdhca-ot-sos-short"] + ) + assert r.exit_code == 2 + assert "No dataset census2016-sdhca-ot-sos-short" in r.stderr + + r = cli_runner.invoke( + ["checkout", "--not-dataset=census2016_sdhca_ot_sos_short"] + ) + assert r.exit_code == 0 + + _check_workingcopy_contains_tables(repo, {"census2016_sdhca_ot_ra_short"}) + + # No WC changes are returned. + r = cli_runner.invoke(["diff", "--exit-code"]) + assert r.exit_code == 0 + + r = cli_runner.invoke( + ["checkout", "main", "--dataset=census2016_sdhca_ot_sos_short"] + ) + assert r.exit_code == 0 + + _check_workingcopy_contains_tables( + repo, {"census2016_sdhca_ot_sos_short", "census2016_sdhca_ot_ra_short"} + )