diff --git a/kart.spec b/kart.spec index 37d8deba..42a86f3f 100644 --- a/kart.spec +++ b/kart.spec @@ -156,7 +156,7 @@ pyi_analysis = Analysis( # TODO: improve this somehow *collect_submodules('kart'), *collect_submodules('kart.annotations'), - *collect_submodules('kart.byod'), + *collect_submodules('kart.linked_storage'), *collect_submodules('kart.lfs_commands'), *collect_submodules('kart.point_cloud'), *collect_submodules('kart.sqlalchemy'), diff --git a/kart/cli.py b/kart/cli.py index 54ddd590..8d6b531a 100755 --- a/kart/cli.py +++ b/kart/cli.py @@ -54,8 +54,8 @@ "point_cloud.import_": {"point-cloud-import"}, "install": {"install"}, "add_dataset": {"add-dataset"}, - "byod.point_cloud_import": {"byod-point-cloud-import"}, - "byod.raster_import": {"byod-raster-import"}, + "linked_storage.point_cloud_import": {"linked-point-cloud-import"}, + "linked_storage.raster_import": {"linked-raster-import"}, } # These commands aren't valid Python symbols, even when we change dash to underscore. diff --git a/kart/cli_util.py b/kart/cli_util.py index 34795dbc..7b2e5b77 100644 --- a/kart/cli_util.py +++ b/kart/cli_util.py @@ -170,7 +170,6 @@ def __init__(self, *args, **kwargs): def handle_parse_result(self, ctx, opts, args): current_opt: bool = self.name in opts for other_name in self.exclusive_with: - if other_name in opts: if current_opt: other = [x for x in ctx.command.params if x.name == other_name][0] @@ -412,3 +411,16 @@ def find_param(ctx_or_params, name): if param.name == name: return param raise RuntimeError(f"Couldn't find param: {name}") + + +def forward_context_to_command(ctx, command): + """ + Given the current context and a new command (not the current command), run the new command but + reusing the same overall context including command line arguments. + The new command should accept arguments in much the same form as the current command. + If any arguments cannot be parsed by the new command, it will fail with a UsageError as per usual. + This could be acceptable so long as it is clear to the user what is unsupported and why. + """ + subctx = command.make_context(command.name, ctx.unparsed_args) + subctx.obj = ctx.obj + subctx.forward(command) diff --git a/kart/import_.py b/kart/import_.py index 3eed33f9..d2859987 100644 --- a/kart/import_.py +++ b/kart/import_.py @@ -5,6 +5,7 @@ call_and_exit_flag, KartCommand, find_param, + forward_context_to_command, ) from kart.completion_shared import file_path_completer from kart.import_sources import from_spec, suggest_specs @@ -60,6 +61,16 @@ def list_import_formats(ctx): @click.option( "--dataset-path", "--dataset", "ds_path", help="The dataset's path once imported" ) +@click.option( + "--link", + "do_link", + is_flag=True, + help=( + "Link the created dataset to the original source location, so that the original source location is treated as " + "the authoritative source for the given data and data is fetched from there if needed. Only supported for " + "tile-based datasets." + ), +) @click.argument( "args", nargs=-1, @@ -134,8 +145,13 @@ def import_(ctx, args, **kwargs): f"Try one of the following:\n{suggest_specs()}" ) - import_cmd = import_source_type.import_cmd + if kwargs.get("do_link"): + import_cmd = import_source_type.linked_import_cmd + if import_cmd is None: + raise click.UsageError( + "--link is not supported for vector or tabular imports" + ) + else: + import_cmd = import_source_type.import_cmd - subctx = import_cmd.make_context(import_cmd.name, ctx.unparsed_args) - subctx.obj = ctx.obj - subctx.forward(import_cmd) + forward_context_to_command(ctx, import_cmd) diff --git a/kart/import_sources.py b/kart/import_sources.py index 2afd6d62..2ce4ec7b 100644 --- a/kart/import_sources.py +++ b/kart/import_sources.py @@ -33,6 +33,17 @@ def import_cmd(self): return raster_import + @property + def linked_import_cmd(self): + if self is self.POINT_CLOUD: + from kart.linked_storage.point_cloud_import import linked_point_cloud_import + + return linked_point_cloud_import + elif self is self.RASTER: + from kart.linked_storage.raster_import import linked_raster_import + + return linked_raster_import + @property def import_source_class(self): if self is self.SQLALCHEMY_TABLE: @@ -80,6 +91,10 @@ def __init__( def import_cmd(self): return self.import_type.import_cmd + @property + def linked_import_cmd(self): + return self.import_type.linked_import_cmd + @property def import_source_class(self): return self.import_type.import_source_class diff --git a/kart/lfs_commands/__init__.py b/kart/lfs_commands/__init__.py index 6e4b191c..b1167790 100644 --- a/kart/lfs_commands/__init__.py +++ b/kart/lfs_commands/__init__.py @@ -376,7 +376,7 @@ def _do_fetch_from_urls(repo, urls_and_lfs_oids, quiet=False): ) if non_s3_url: raise NotImplementedError( - f"Invalid URL - only S3 URLs are currently supported for BYOD repos: {non_s3_url}" + f"Invalid URL - only S3 URLs are currently supported for linked-storage datasets: {non_s3_url}" ) urls_and_paths_and_oids = [ diff --git a/kart/byod/__init__.py b/kart/linked_storage/__init__.py similarity index 100% rename from kart/byod/__init__.py rename to kart/linked_storage/__init__.py diff --git a/kart/byod/importer.py b/kart/linked_storage/importer.py similarity index 99% rename from kart/byod/importer.py rename to kart/linked_storage/importer.py index 5ab80772..b3e84bab 100644 --- a/kart/byod/importer.py +++ b/kart/linked_storage/importer.py @@ -11,7 +11,7 @@ L = logging.getLogger(__name__) -class ByodTileImporter: +class LinkedTileImporter: """ Subclassable logic for importing the metadata from tile-based datasets, while leaving the data in-place on existing hosted storage. diff --git a/kart/byod/point_cloud_import.py b/kart/linked_storage/point_cloud_import.py similarity index 65% rename from kart/byod/point_cloud_import.py rename to kart/linked_storage/point_cloud_import.py index d03ba89c..4c254c06 100644 --- a/kart/byod/point_cloud_import.py +++ b/kart/linked_storage/point_cloud_import.py @@ -2,18 +2,27 @@ import click -from kart.byod.importer import ByodTileImporter +from kart.linked_storage.importer import LinkedTileImporter from kart.cli_util import StringFromFile, MutexOption, KartCommand from kart.point_cloud.import_ import PointCloudImporter from kart.point_cloud.metadata_util import extract_pc_tile_metadata -from kart.s3_util import get_hash_and_size_of_s3_object, fetch_from_s3 +from kart.s3_util import get_hash_and_size_of_s3_object L = logging.getLogger(__name__) -@click.command("byod-point-cloud-import", hidden=True, cls=KartCommand) +@click.command("linked-point-cloud-import", hidden=True, cls=KartCommand) @click.pass_context +@click.option( + "--convert-to-copc/--no-convert-to-copc", + "--cloud-optimized/--no-cloud-optimized", + "--cloud-optimised/--no-cloud-optimised", + " /--preserve-format", + is_flag=True, + default=None, + help="Whether to convert all non-COPC LAS or LAZ files to COPC LAZ files, or to import all files in their native format.", +) @click.option( "--message", "-m", @@ -82,13 +91,26 @@ hidden=True, ) @click.option("--dataset-path", "--dataset", help="The dataset's path once imported") +@click.option( + "--link", + "do_link", + is_flag=True, + default=None, + hidden=True, + help=( + "Link the created dataset to the original source location, so that the original source location is treated as " + "the authoritative source for the given data and data is fetched from there if needed. Only supported for " + "tile-based datasets." + ), +) @click.argument( "sources", nargs=-1, metavar="SOURCE [SOURCES...]", ) -def byod_point_cloud_import( +def linked_point_cloud_import( ctx, + convert_to_copc, message, do_checkout, replace_existing, @@ -98,16 +120,27 @@ def byod_point_cloud_import( allow_empty, num_workers, dataset_path, + do_link, sources, ): """ - Experimental. Import a dataset of point-cloud tiles from S3. Doesn't fetch the tiles, does store the tiles original location. + Import a dataset of point-cloud tiles from S3. Doesn't fetch the tiles, does store the tiles original location. SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files). """ + if do_link is False: + # This is here for technical reasons - all the options are forwarded from one command to another, including --link. + # In practise we don't expect the user to set --link at all if they are also manually calling this (hidden) command. + raise click.UsageError("Can't do a linked-import with --link=false") + if convert_to_copc: + raise click.UsageError( + "Sorry, converting a linked dataset to COPC is not supported - " + "the data must remain in its original location and its original format as the authoritative source." + ) + repo = ctx.obj.repo - ByodPointCloudImporter( + LinkedPointCloudImporter( repo=repo, ctx=ctx, convert_to_cloud_optimized=False, @@ -124,7 +157,7 @@ def byod_point_cloud_import( ).import_tiles() -class ByodPointCloudImporter(ByodTileImporter, PointCloudImporter): +class LinkedPointCloudImporter(LinkedTileImporter, PointCloudImporter): def extract_tile_metadata(self, tile_location): oid_and_size = get_hash_and_size_of_s3_object(tile_location) return None, extract_pc_tile_metadata(tile_location, oid_and_size=oid_and_size) diff --git a/kart/byod/raster_import.py b/kart/linked_storage/raster_import.py similarity index 65% rename from kart/byod/raster_import.py rename to kart/linked_storage/raster_import.py index 2ab2dfd3..50886b7b 100644 --- a/kart/byod/raster_import.py +++ b/kart/linked_storage/raster_import.py @@ -2,18 +2,27 @@ import click -from kart.byod.importer import ByodTileImporter +from kart.linked_storage.importer import LinkedTileImporter from kart.cli_util import StringFromFile, MutexOption, KartCommand from kart.raster.import_ import RasterImporter from kart.raster.metadata_util import extract_raster_tile_metadata -from kart.s3_util import get_hash_and_size_of_s3_object, fetch_from_s3 +from kart.s3_util import get_hash_and_size_of_s3_object L = logging.getLogger(__name__) -@click.command("byod-raster-import", hidden=True, cls=KartCommand) +@click.command("linked-raster-import", hidden=True, cls=KartCommand) @click.pass_context +@click.option( + "--convert-to-cog/--no-convert-to-cog", + "--cloud-optimized/--no-cloud-optimized", + "--cloud-optimised/--no-cloud-optimised", + " /--preserve-format", + is_flag=True, + default=None, + help="Whether to convert all GeoTIFFs to COGs (Cloud Optimized GeoTIFFs), or to import all files in their native format.", +) @click.option( "--message", "-m", @@ -82,13 +91,26 @@ hidden=True, ) @click.option("--dataset-path", "--dataset", help="The dataset's path once imported") +@click.option( + "--link", + "do_link", + is_flag=True, + default=None, + hidden=True, + help=( + "Link the created dataset to the original source location, so that the original source location is treated as " + "the authoritative source for the given data and data is fetched from there if needed. Only supported for " + "tile-based datasets." + ), +) @click.argument( "sources", nargs=-1, metavar="SOURCE [SOURCES...]", ) -def byod_raster_import( +def linked_raster_import( ctx, + convert_to_cog, message, do_checkout, replace_existing, @@ -98,16 +120,27 @@ def byod_raster_import( allow_empty, num_workers, dataset_path, + do_link, sources, ): """ - Experimental. Import a dataset of raster tiles from S3. Doesn't fetch the tiles, does store the tiles original location. + Import a dataset of raster tiles from S3. Doesn't fetch the tiles, does store the tiles original location. SOURCES should be one or more GeoTIFF files (or wildcards that match multiple GeoTIFF files). """ + if do_link is False: + # This is here for technical reasons - all the options are forwarded from one command to another, including --link. + # In practise we don't expect the user to set --link at all if they are also manually calling this (hidden) command. + raise click.UsageError("Can't do a linked import with --link=false") + if convert_to_cog: + raise click.UsageError( + "Sorry, converting a linked dataset to COG is not supported - " + "the data must remain in its original location and its original format as the authoritative source." + ) + repo = ctx.obj.repo - ByodRasterImporter( + LinkedRasterImporter( repo=repo, ctx=ctx, convert_to_cloud_optimized=False, @@ -124,7 +157,7 @@ def byod_raster_import( ).import_tiles() -class ByodRasterImporter(ByodTileImporter, RasterImporter): +class LinkedRasterImporter(LinkedTileImporter, RasterImporter): def extract_tile_metadata(self, tile_location): oid_and_size = get_hash_and_size_of_s3_object(tile_location) return None, extract_raster_tile_metadata( diff --git a/kart/point_cloud/import_.py b/kart/point_cloud/import_.py index 82f4fc2b..ad91f1e8 100644 --- a/kart/point_cloud/import_.py +++ b/kart/point_cloud/import_.py @@ -2,7 +2,12 @@ import click -from kart.cli_util import StringFromFile, MutexOption, KartCommand +from kart.cli_util import ( + StringFromFile, + MutexOption, + KartCommand, + forward_context_to_command, +) from kart.completion_shared import file_path_completer from kart.exceptions import InvalidOperation, INVALID_FILE_FORMAT from kart.parse_args import parse_import_sources_and_datasets @@ -98,6 +103,15 @@ hidden=True, ) @click.option("--dataset-path", "--dataset", help="The dataset's path once imported") +@click.option( + "--link", + "do_link", + is_flag=True, + help=( + "Link the created dataset to the original source location, so that the original source location is treated as " + "the authoritative source for the given data and data is fetched from there if needed." + ), +) @click.argument( "args", nargs=-1, @@ -116,6 +130,7 @@ def point_cloud_import( allow_empty, num_workers, dataset_path, + do_link, args, ): """ @@ -123,6 +138,12 @@ def point_cloud_import( SOURCES should be one or more LAZ or LAS files (or wildcards that match multiple LAZ or LAS files). """ + if do_link: + from kart.linked_storage.point_cloud_import import linked_point_cloud_import + + forward_context_to_command(ctx, linked_point_cloud_import) + return + repo = ctx.obj.repo sources, datasets = parse_import_sources_and_datasets(args) diff --git a/kart/raster/import_.py b/kart/raster/import_.py index 7cf402a0..e26b167c 100644 --- a/kart/raster/import_.py +++ b/kart/raster/import_.py @@ -2,7 +2,12 @@ import click -from kart.cli_util import StringFromFile, MutexOption, KartCommand +from kart.cli_util import ( + StringFromFile, + MutexOption, + KartCommand, + forward_context_to_command, +) from kart.completion_shared import file_path_completer from kart.exceptions import InvalidOperation, INVALID_FILE_FORMAT from kart.parse_args import parse_import_sources_and_datasets @@ -98,6 +103,15 @@ hidden=True, ) @click.option("--dataset-path", "--dataset", help="The dataset's path once imported") +@click.option( + "--link", + "do_link", + is_flag=True, + help=( + "Link the created dataset to the original source location, so that the original source location is treated as " + "the authoritative source for the given data and data is fetched from there if needed." + ), +) @click.argument( "args", nargs=-1, @@ -116,6 +130,7 @@ def raster_import( allow_empty, num_workers, dataset_path, + do_link, args, ): """ @@ -123,6 +138,12 @@ def raster_import( SOURCES should be one or more GeoTIFF files (or wildcards that match multiple GeoTIFF files). """ + if do_link: + from kart.linked_storage.raster_import import linked_raster_import + + forward_context_to_command(ctx, linked_raster_import) + return + repo = ctx.obj.repo sources, datasets = parse_import_sources_and_datasets(args) diff --git a/kart/tile/importer.py b/kart/tile/importer.py index 130de175..334a50cc 100644 --- a/kart/tile/importer.py +++ b/kart/tile/importer.py @@ -402,8 +402,9 @@ def preprocess_sources(self, sources): m = self.URI_PATTERN.match(source) scheme = m.group(1) if m else None if scheme not in self.ALLOWED_SCHEMES: + suffix = f", not a {scheme} URI" if scheme else "" raise click.UsageError( - f"SOURCE {source} should be {self.ALLOWED_SCHEMES_DESC}, not a {m.group(1)} URI" + f"SOURCE {source} should be {self.ALLOWED_SCHEMES_DESC}{suffix}" ) if scheme is None and not (Path() / source).is_file(): raise NotFound(f"No data found at {source}", exit_code=NO_IMPORT_SOURCE) diff --git a/tests/byod/test_imports.py b/tests/linked_storage/test_imports.py similarity index 96% rename from tests/byod/test_imports.py rename to tests/linked_storage/test_imports.py index 38b383a1..5f5e1e02 100644 --- a/tests/byod/test_imports.py +++ b/tests/linked_storage/test_imports.py @@ -11,7 +11,9 @@ @pytest.mark.slow -def test_byod_point_cloud_import( +@pytest.mark.parametrize("command", ["linked-point-cloud-import", "import --link"]) +def test_linked_point_cloud_import( + command, tmp_path, chdir, cli_runner, @@ -26,7 +28,7 @@ def test_byod_point_cloud_import( with chdir(repo_path): r = cli_runner.invoke( [ - "byod-point-cloud-import", + *command.split(" "), s3_test_data_point_cloud, "--dataset-path=auckland", "--no-checkout", @@ -146,7 +148,9 @@ def test_byod_point_cloud_import( @pytest.mark.slow -def test_byod_raster_import( +@pytest.mark.parametrize("command", ["linked-raster-import", "import --link"]) +def test_linked_raster_import( + command, tmp_path, chdir, cli_runner, @@ -161,7 +165,7 @@ def test_byod_raster_import( with chdir(repo_path): r = cli_runner.invoke( [ - "byod-raster-import", + *command.split(" "), s3_test_data_raster, "--dataset-path=erorisk_si", "--no-checkout",