From edf7be3838f67cd19b95f23b67d8090e49cfbfd0 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 11 Oct 2023 22:06:16 +0100 Subject: [PATCH 1/4] Add clientpath command --- src/omero_mkngff/__init__.py | 97 ++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index a8385f7..89e3103 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -25,7 +25,9 @@ import omero.all # noqa from omero.cli import BaseControl, Parser +from omero.gateway import BlitzGateway from omero.sys import ParametersI +from omero.rtypes import rstring SUFFIX = "mkngff" HELP = """Plugin to swap OMERO filesets with NGFF @@ -169,6 +171,80 @@ def _configure(self, parser: Parser) -> None: symlink.add_argument("symlink_target") symlink.set_defaults(func=self.symlink) + # fix 'clientpath' from 'unknown' + # templatePrefix: demo_2/Blitz-0-Ice.ThreadPool.Server-18/2018-11/26/10-44-37.527_mkngff/ + # originalFile path: demo_2/Blitz-0-Ice.ThreadPool.Server-24/2018-11/26/10-39-10.551_mkngff/c49efcfd-e767-4ae5-adbf-299cafd92120.zarr/.zattrs + # clientpath: https://uk1s3.embassy.ebi.ac.uk/bia-integrator-data/S-BIAD815/c49efcfd-e767-4ae5-adbf-299cafd92120/c49efcfd-e767-4ae5-adbf-299cafd92120.zarr/ + clientpath = sub.add_parser("clientpath", help="Fix clientpath") + clientpath.add_argument("target", help="E.g. Fileset:1, Project:2, Screen:3 etc") + clientpath.add_argument("url") + clientpath.set_defaults(func=self.clientpath) + + def clientpath(self, args: Namespace) -> None: + client = self.ctx.conn(args) # noqa + conn = BlitzGateway(client_obj=client) + for fileset in self.get_filesets(conn, args.target): + print("Fileset", fileset.id.val) + self.fix_clientpath(conn, fileset, args.url) + + def fix_clientpath(self, conn, fileset, url): + prefix = fileset.templatePrefix.val + update = conn.getUpdateService() + + tosave = [] + for fse in fileset.copyUsedFiles(): + orig = fse.originalFile + pathname = os.path.join(orig.path.val, orig.name.val) + if "zarr" not in pathname: + continue + zarrpath = pathname.replace(prefix, "") + zarrname = zarrpath.split(".zarr")[0] + cpath = os.path.join(url, zarrname, zarrpath) + fse.clientPath = rstring(cpath) + tosave.append(fse) + + print("tosave", len(tosave)) + if len(tosave) > 0: + update.saveArray(tosave) + + def get_object(self, conn, obj_string): + for dtype in ["Screen", "Plate", "Project", "Dataset", "Image"]: + if obj_string.startswith(dtype): + obj_id = int(obj_string.replace(dtype + ":", "")) + obj = conn.getObject(dtype, obj_id) + if obj is None: + print(obj_string, "not found!") + return obj + + def get_filesets(self, conn, obj_string): + print("get_filesets", obj_string) + """obj_string is Image:123 or Fileset:123 or Plate:123""" + if obj_string.startswith("Fileset:"): + yield self.get_fileset(conn.c, int(obj_string.split(":")[1])) + + obj = self.get_object(conn, obj_string) + if obj_string.startswith("Image:"): + yield self.get_fileset(conn.c, obj.fileset.id.val) + if obj_string.startswith("Plate:"): + yield self.get_fileset_from_plate(conn, obj) + + if obj_string.startswith("Screen:"): + for plate in obj.listChildren(): + yield self.get_fileset_from_plate(conn, plate) + if obj_string.startswith("Dataset:"): + for image in obj.listChildren(): + yield self.get_fileset(conn.c, image.fileset.id.val) + if obj_string.startswith("Project:"): + for dataset in obj.listChildren(): + for image in dataset.listChildren(): + yield self.get_fileset(conn.c, image.fileset.id.val) + + def get_fileset_from_plate(self, conn, plate): + for well in plate.listChildren(): + for ws in well.listChildren(): + image = ws.getImage() + return self.get_fileset(conn.c, image.fileset.id.val) + def setup(self, args: Namespace) -> None: self.ctx.out(SETUP) @@ -234,26 +310,29 @@ def symlink(self, args: Namespace) -> None: self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target) def get_prefix(self, args): + client = self.ctx.conn(args) # noqa + fs = self.get_fileset(client, args.fileset_id) + prefix = fs.templatePrefix.val + + if prefix.endswith("/"): + prefix = prefix[:-1] # Drop ending "/" + + return prefix - conn = self.ctx.conn(args) # noqa - q = conn.sf.getQueryService() + def get_fileset(self, client, fileset_id): + q = client.sf.getQueryService() rv = q.findAllByQuery( ( "select f from Fileset f join fetch f.usedFiles fe " "join fetch fe.originalFile ofile where f.id = :id" ), - ParametersI().addId(args.fileset_id), + ParametersI().addId(fileset_id), ) if len(rv) != 1: self.ctx.die(400, f"Found wrong number of filesets: {len(rv)}") return - prefix = rv[0].templatePrefix.val - - if prefix.endswith("/"): - prefix = prefix[:-1] # Drop ending "/" - - return prefix + return rv[0] def get_symlink_dir(self, symlink_repo, prefix, symlink_path): prefix_dir = os.path.join(symlink_repo, prefix) From 10abb275bae6e5344cce232e5e322fb610725d36 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 16 Oct 2023 14:31:16 +0100 Subject: [PATCH 2/4] Revert "Add clientpath command" This reverts commit edf7be3838f67cd19b95f23b67d8090e49cfbfd0. --- src/omero_mkngff/__init__.py | 97 ++++-------------------------------- 1 file changed, 9 insertions(+), 88 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index 89e3103..a8385f7 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -25,9 +25,7 @@ import omero.all # noqa from omero.cli import BaseControl, Parser -from omero.gateway import BlitzGateway from omero.sys import ParametersI -from omero.rtypes import rstring SUFFIX = "mkngff" HELP = """Plugin to swap OMERO filesets with NGFF @@ -171,80 +169,6 @@ def _configure(self, parser: Parser) -> None: symlink.add_argument("symlink_target") symlink.set_defaults(func=self.symlink) - # fix 'clientpath' from 'unknown' - # templatePrefix: demo_2/Blitz-0-Ice.ThreadPool.Server-18/2018-11/26/10-44-37.527_mkngff/ - # originalFile path: demo_2/Blitz-0-Ice.ThreadPool.Server-24/2018-11/26/10-39-10.551_mkngff/c49efcfd-e767-4ae5-adbf-299cafd92120.zarr/.zattrs - # clientpath: https://uk1s3.embassy.ebi.ac.uk/bia-integrator-data/S-BIAD815/c49efcfd-e767-4ae5-adbf-299cafd92120/c49efcfd-e767-4ae5-adbf-299cafd92120.zarr/ - clientpath = sub.add_parser("clientpath", help="Fix clientpath") - clientpath.add_argument("target", help="E.g. Fileset:1, Project:2, Screen:3 etc") - clientpath.add_argument("url") - clientpath.set_defaults(func=self.clientpath) - - def clientpath(self, args: Namespace) -> None: - client = self.ctx.conn(args) # noqa - conn = BlitzGateway(client_obj=client) - for fileset in self.get_filesets(conn, args.target): - print("Fileset", fileset.id.val) - self.fix_clientpath(conn, fileset, args.url) - - def fix_clientpath(self, conn, fileset, url): - prefix = fileset.templatePrefix.val - update = conn.getUpdateService() - - tosave = [] - for fse in fileset.copyUsedFiles(): - orig = fse.originalFile - pathname = os.path.join(orig.path.val, orig.name.val) - if "zarr" not in pathname: - continue - zarrpath = pathname.replace(prefix, "") - zarrname = zarrpath.split(".zarr")[0] - cpath = os.path.join(url, zarrname, zarrpath) - fse.clientPath = rstring(cpath) - tosave.append(fse) - - print("tosave", len(tosave)) - if len(tosave) > 0: - update.saveArray(tosave) - - def get_object(self, conn, obj_string): - for dtype in ["Screen", "Plate", "Project", "Dataset", "Image"]: - if obj_string.startswith(dtype): - obj_id = int(obj_string.replace(dtype + ":", "")) - obj = conn.getObject(dtype, obj_id) - if obj is None: - print(obj_string, "not found!") - return obj - - def get_filesets(self, conn, obj_string): - print("get_filesets", obj_string) - """obj_string is Image:123 or Fileset:123 or Plate:123""" - if obj_string.startswith("Fileset:"): - yield self.get_fileset(conn.c, int(obj_string.split(":")[1])) - - obj = self.get_object(conn, obj_string) - if obj_string.startswith("Image:"): - yield self.get_fileset(conn.c, obj.fileset.id.val) - if obj_string.startswith("Plate:"): - yield self.get_fileset_from_plate(conn, obj) - - if obj_string.startswith("Screen:"): - for plate in obj.listChildren(): - yield self.get_fileset_from_plate(conn, plate) - if obj_string.startswith("Dataset:"): - for image in obj.listChildren(): - yield self.get_fileset(conn.c, image.fileset.id.val) - if obj_string.startswith("Project:"): - for dataset in obj.listChildren(): - for image in dataset.listChildren(): - yield self.get_fileset(conn.c, image.fileset.id.val) - - def get_fileset_from_plate(self, conn, plate): - for well in plate.listChildren(): - for ws in well.listChildren(): - image = ws.getImage() - return self.get_fileset(conn.c, image.fileset.id.val) - def setup(self, args: Namespace) -> None: self.ctx.out(SETUP) @@ -310,29 +234,26 @@ def symlink(self, args: Namespace) -> None: self.create_symlink(args.symlink_repo, prefix, symlink_path, args.symlink_target) def get_prefix(self, args): - client = self.ctx.conn(args) # noqa - fs = self.get_fileset(client, args.fileset_id) - prefix = fs.templatePrefix.val - - if prefix.endswith("/"): - prefix = prefix[:-1] # Drop ending "/" - - return prefix - def get_fileset(self, client, fileset_id): - q = client.sf.getQueryService() + conn = self.ctx.conn(args) # noqa + q = conn.sf.getQueryService() rv = q.findAllByQuery( ( "select f from Fileset f join fetch f.usedFiles fe " "join fetch fe.originalFile ofile where f.id = :id" ), - ParametersI().addId(fileset_id), + ParametersI().addId(args.fileset_id), ) if len(rv) != 1: self.ctx.die(400, f"Found wrong number of filesets: {len(rv)}") return - return rv[0] + prefix = rv[0].templatePrefix.val + + if prefix.endswith("/"): + prefix = prefix[:-1] # Drop ending "/" + + return prefix def get_symlink_dir(self, symlink_repo, prefix, symlink_path): prefix_dir = os.path.join(symlink_repo, prefix) From 0d2615ac84718512162e7ef268da1f0a5831752c Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 16 Oct 2023 15:54:06 +0100 Subject: [PATCH 3/4] Add clientpath support to sql and setup.sql --- src/omero_mkngff/__init__.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index a8385f7..1665d4f 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -104,7 +104,7 @@ insert into filesetentry (id, {DETAILS1}, fileset, originalfile, fileset_index, clientpath) values (nextval('seq_filesetentry'), {DETAILS2}, - new_fileset, new_file, i-1, 'unknown'); + new_fileset, new_file, i-1, info[i][4]); end loop; @@ -134,7 +134,7 @@ commit; """ -ROW = """ ['{PATH}', '{NAME}', '{MIME}']""" +ROW = """ ['{PATH}', '{NAME}', '{MIME}', '{CLIENTPATH}']""" class MkngffControl(BaseControl): @@ -155,6 +155,10 @@ def _configure(self, parser: Parser) -> None: help=("Create symlinks from Fileset to symlink_target using" "this ManagedRepo path, e.g. /data/OMERO/ManagedRepository") ) + sql.add_argument( + "--clientpath", + help=("Base path to create clientpath/path/to/img.zarr/") + ) sql.add_argument("fileset_id", type=int) sql.add_argument("symlink_target") sql.set_defaults(func=self.sql) @@ -193,6 +197,12 @@ def sql(self, args: Namespace) -> None: # Need a file to set path/name on pixels table BioFormats uses for setId() setid_target = None for row_path, row_name, row_mime in self.walk(symlink_path): + row_clientpath = "unknown" + if args.clientpath: + # zarr_path is relative URL from .zarr /to/file/ + zarr_path = str(row_path).replace(args.symlink_target, '') + row_clientpath = f"{args.clientpath}{zarr_path}/{row_name}" + # remove common path to shorten row_path = str(row_path).replace(f"{symlink_path.parent}", "") if str(row_path).startswith("/"): @@ -206,6 +216,7 @@ def sql(self, args: Namespace) -> None: PATH=f"{row_full_path}/", NAME=row_name, MIME=row_mime, + CLIENTPATH=row_clientpath, ) ) From 2469f6df8c3fff34a4a810b460f9c341531d0516 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 16 Oct 2023 16:10:18 +0100 Subject: [PATCH 4/4] Ignore Directories in walk() of fileset --- src/omero_mkngff/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/omero_mkngff/__init__.py b/src/omero_mkngff/__init__.py index 1665d4f..fc5a5b1 100644 --- a/src/omero_mkngff/__init__.py +++ b/src/omero_mkngff/__init__.py @@ -298,7 +298,6 @@ def walk(self, path: Path) -> Generator[Tuple[Path, str, str], None, None]: else: is_array = (p / ".zarray").exists() if is_array or (p / ".zgroup").exists(): - yield (p.parent, p.name, "Directory") # If array, don't recursively check sub-dirs if is_array: yield (p, ".zarray", "application/octet-stream")