diff --git a/CHANGELOG.md b/CHANGELOG.md index 37987f6b..f036eeeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Please note that compatibility for 0.x releases (software or repositories) isn't _When adding new entries to the changelog, please include issue/PR numbers wherever possible._ +## 0.15.1 (UNRELEASED) + +- Prevented committing local changes to linked datasets. [#953](https://github.com/koordinates/kart/pull/953) + ## 0.15.0 ### Major changes diff --git a/docs/pages/s3.rst b/docs/pages/s3.rst index 95ec132d..3a792286 100644 --- a/docs/pages/s3.rst +++ b/docs/pages/s3.rst @@ -71,9 +71,8 @@ Kart uses a standard AWS SDK to fetch data from S3. AWS credentials are loaded f Editing Tiles ^^^^^^^^^^^^^ -Currently, Kart does not write to S3 on the user's behalf for any reason. Any edits made to the linked dataset will be stored -in Kart, and will work the same as in any other dataset - the modified tiles will not be linked to any particular URL, and they -will not be written back to S3. +Currently, Kart does not write to S3 on the user's behalf for any reason. This means linked datasets cannot be edited by modifying +the working copy - committing such changes is prevented. Users may opt to write the required changes to S3 themselves, at which point they can use the ``kart import --replace-existing --link`` command to create a new version of the linked dataset. However, when doing so, take care not to overwrite any of the original tiles diff --git a/kart/base_diff_writer.py b/kart/base_diff_writer.py index afe6c23a..e594a218 100644 --- a/kart/base_diff_writer.py +++ b/kart/base_diff_writer.py @@ -13,7 +13,6 @@ from kart.exceptions import CrsError, InvalidOperation from kart.key_filters import RepoKeyFilter from kart import list_of_conflicts -from kart.list_of_conflicts import ListOfConflicts from kart.promisor_utils import FetchPromisedBlobsProcess, object_is_promised from kart.repo import KartRepoState from kart.spatial_filter import SpatialFilter @@ -83,7 +82,7 @@ def __init__( # used by json-lines diffs only diff_estimate_accuracy=None, # used by html diff only - html_template=None + html_template=None, ): self.repo = repo self.commit_spec = commit_spec @@ -115,6 +114,7 @@ def __init__( self.spatial_filter_conflicts = RepoKeyFilter() self.list_of_conflicts_warnings = [] + self.linked_dataset_changes = set() self.output_path = self._check_output_path( repo, self._normalize_output_path(output_path) @@ -226,6 +226,7 @@ def write_warnings_footer(self): """For writing any footer that is not part of the diff itself. Generally just writes warnings to stderr.""" self.write_spatial_filter_conflicts_warning_footer() self.write_list_of_conflicts_warning_footer() + self.write_linked_dataset_changes_warning_footer() def write_spatial_filter_conflicts_warning_footer(self): """ @@ -288,6 +289,23 @@ def write_list_of_conflicts_warning_footer(self): for warning in self.list_of_conflicts_warnings: click.echo(f" {warning}", err=True) + def write_linked_dataset_changes_warning_footer(self): + if not self.linked_dataset_changes: + return + click.secho( + "Warning: changes to linked datasets cannot be committed.", + bold=True, + err=True, + ) + click.echo( + "To update a linked dataset, re-import from the source with both --link and --replace-existing.\n" + "To discard these changes, use `kart reset --discard-changes`.", + err=True, + ) + click.echo("Linked datasets with uncommitted changes:", err=True) + for ds_path in sorted(self.linked_dataset_changes): + click.echo(f" {ds_path}", err=True) + def write_diff(self, diff_format=DiffFormat.FULL): """Default implementation for writing a diff. Subclasses can override.""" # Entered when -o is text @@ -322,6 +340,8 @@ def write_ds_diff_for_path(self, ds_path, diff_format=DiffFormat.FULL): list_of_conflicts.extract_error_messages_from_dataset_diff( ds_path, ds_diff, self.list_of_conflicts_warnings ) + if self.include_wc_diff: + self._check_for_linked_dataset_changes(ds_path, ds_diff) self.write_ds_diff(ds_path, ds_diff, diff_format=diff_format) return has_changes @@ -386,6 +406,9 @@ def get_repo_diff(self, include_files=True, diff_format=DiffFormat.FULL): list_of_conflicts.extract_error_messages_from_repo_diff( repo_diff, self.list_of_conflicts_warnings ) + if self.include_wc_diff: + for ds_path, ds_diff in repo_diff.items(): + self._check_for_linked_dataset_changes(ds_path, ds_diff) return repo_diff def get_dataset_diff(self, ds_path, diff_format=DiffFormat.FULL): @@ -707,6 +730,11 @@ def _get_old_or_new_dataset(self, ds_path): dataset = self.target_rs.datasets().get(ds_path) return dataset + def _check_for_linked_dataset_changes(self, ds_path, ds_diff): + dataset = self._get_old_or_new_dataset(ds_path) + if dataset and dataset.get_meta_item("linked-storage.json") and ds_diff: + self.linked_dataset_changes.add(ds_path) + class FeatureDeltaFetcher: """ diff --git a/kart/commit.py b/kart/commit.py index 3a1b7cd9..f3b3b772 100644 --- a/kart/commit.py +++ b/kart/commit.py @@ -169,6 +169,9 @@ def commit( "(this will overwrite some existing features that are outside of the current spatial filter)", exit_code=SPATIAL_FILTER_CONFLICT, ) + if commit_diff_writer.linked_dataset_changes: + commit_diff_writer.write_warnings_footer() + raise InvalidOperation("Aborting commit due to changes to linked datasets.") do_json = output_format == "json" commit_msg = None diff --git a/tests/data/linked-dataset-with-tiles.tgz b/tests/data/linked-dataset-with-tiles.tgz new file mode 100644 index 00000000..4832da44 Binary files /dev/null and b/tests/data/linked-dataset-with-tiles.tgz differ diff --git a/tests/linked_storage/test_workingcopy.py b/tests/linked_storage/test_workingcopy.py new file mode 100644 index 00000000..e03d03df --- /dev/null +++ b/tests/linked_storage/test_workingcopy.py @@ -0,0 +1,41 @@ +from kart.repo import KartRepo +from kart.exceptions import INVALID_OPERATION + + +def test_read_only_linked_datasets( + data_archive, + cli_runner, + check_lfs_hashes, + check_tile_is_reflinked, +): + # Currently, we don't allow users to edit linked datasets except by doing a full linked re-import. + # This avoids a confusing situation where linked datasets are gradually replaced with unlinked tiles sourced locally. + with data_archive("linked-dataset-with-tiles") as repo_path: + r = cli_runner.invoke(["lfs+", "fetch", "HEAD", "--dry-run"]) + assert r.exit_code == 0, r.stderr + assert r.stdout.splitlines() == [ + "Running fetch with --dry-run:", + " Found nothing to fetch", + ] + repo = KartRepo(repo_path) + check_lfs_hashes(repo, expected_file_count=16) + + r = cli_runner.invoke(["diff", "--exit-code"]) + assert r.exit_code == 0 + + (repo_path / "auckland" / "auckland_0_0.laz").rename( + repo_path / "auckland" / "new.laz" + ) + + r = cli_runner.invoke(["diff"]) + assert ( + r.stderr.splitlines()[0] + == "Warning: changes to linked datasets cannot be committed." + ) + + r = cli_runner.invoke(["commit", "-m", "yolo"]) + assert r.exit_code == INVALID_OPERATION + assert ( + r.stderr.splitlines()[-1] + == "Error: Aborting commit due to changes to linked datasets." + )