-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmisc_functions_rplab.py
605 lines (494 loc) · 20.1 KB
/
misc_functions_rplab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
"""
Notes on this module:
1. I would have made so many of these functions methods of
a subclass of pathlib.pathlib.Path, but subclassing pathlib.Path is really annoying because
of its "_flavor" attribute, used to differentiate between Posix and Windows
file systems. Therefore, we have this module with functions that would gladly
belong in a class. Sigh.
2. All file system functions in this module should support both str parameters
as well as pathlib.pathlib.Path. Generally, this is as easy as converting pathlib.Path objects
to strings using str() or vice versa with pathlib.Path().
"""
import contextlib
import json
import os
import pathlib
import shutil
from ast import literal_eval
from configparser import ConfigParser
from enum import Enum
import numpy as np
import psutil
import skimage.io
from natsort import natsorted
from tifffile import TiffFile
RE_NUM = "[0-9]"
class ImageFileType(Enum):
"""
Enum class with constants representing image file type, which is determined
by file extension.
Current assumptions are that all image file types are supported by
skimage's io module, so that imread and imwrite work. If this ends up
not being the case, separate implementation will need to be applied for
those image file extensions that aren't supported.
"""
TIF: str = [".tif", ".tiff"]
PNG: str = [".png"]
class OtherFileType(Enum):
TXT: str = ".txt"
class FileSubtype(Enum):
"""
Enum class with constants representing file subtype, which is determined
by substrings in filename, such as "MMStack" for Micro-Manager tif stack
images and "metadata" for Micro-Manager metadata files.
"""
MMSTACK: str = "MMStack"
METADATA: str = "metadata"
LS_NOTES: str = "notes"
OME: str = ".ome"
#functions to determine file type
def get_file_type(file_path: pathlib.Path) -> ImageFileType:
"""
Gets file type, determined by file extension.
### Paramaters:
file_path: str
### Returns:
file_type: FileType
file type of file at file_path.
"""
file_path = pathlib.Path(file_path)
file_extn = file_path.suffix
if file_extn == OtherFileType.TXT.value:
return OtherFileType.TXT
for extn in ImageFileType.TIF.value:
if file_extn == extn:
return ImageFileType.TIF
for extn in ImageFileType.PNG.value:
if file_extn == extn:
return ImageFileType.PNG
def get_file_subtype(file_path: pathlib.Path) -> FileSubtype:
"""
Gets subtype of file. Subtype is the location/context from where that file
was generated.
For example, a file with subtype MMSTACK is a tif stack file generated
by Micro-Manager.
### Parameters:
file_path: str
### Returns:
file_subtype: FileSubtype
file subtype of file at file_path.
"""
file_type = get_file_type(file_path)
filename = file_path.name
if file_type == ImageFileType.TIF and FileSubtype.MMSTACK.value in filename:
return FileSubtype.MMSTACK
if file_type == OtherFileType.TXT:
if FileSubtype.METADATA.value in filename:
return FileSubtype.METADATA
elif FileSubtype.LS_NOTES.value in filename:
return FileSubtype.LS_NOTES
def is_tif(file_path: pathlib.Path):
return get_file_type(file_path) == ImageFileType.TIF
def is_png(file_path: pathlib.Path):
return get_file_type(file_path) == ImageFileType.PNG
def is_mm_metadata(file_path: pathlib.Path):
return get_file_subtype(file_path) == FileSubtype.METADATA
def is_ls_pycro_notes(file_path: pathlib.Path):
return get_file_subtype(file_path) == FileSubtype.LS_NOTES
def get_image_extns() -> list:
"""
Returns list with all image file extensions.
"""
extn_lists = [extn.value for extn in ImageFileType]
return [extn for ftype in extn_lists for extn in ftype]
def remove_file_extn(file_path) -> pathlib.Path:
path = pathlib.Path(file_path)
path.with_suffix("")
if isinstance(file_path, str):
return str(path)
elif isinstance(file_path, pathlib.Path):
return path
def remove_mmstack(file_path: pathlib.Path) -> pathlib.Path:
"""
removes "MMStack_" substring from Micro-Manager image filenames.
If file_path is a str, returns str with "MMStack_" removed. If file_path
is a pathlib.Path, instead returns pathlib.Path with "MMStack_" removed.
"""
file_name = str(file_path).replace(f"_{FileSubtype.MMSTACK.value}", "")
if isinstance(file_path, str):
return file_name
elif isinstance(file_path, pathlib.Path):
return pathlib.Path(file_name)
def remove_ome(file_path: pathlib.Path) -> pathlib.Path:
"""
removes ".ome" substring from Micro-Manager image filenames.
If file_path is a str, returns str with ".ome" removed. If file_path
is a pathlib.Path, instead returns pathlib.Path with ".ome" removed.
"""
file_name = str(file_path).replace(FileSubtype.OME.value, "")
if isinstance(file_path, str):
return file_name
elif isinstance(file_path, pathlib.Path):
return pathlib.Path(file_name)
def shutil_copy_ignore_images(source_dir: pathlib.Path,
dest_dir: pathlib.Path
):
"""
Copies directory tree of root_dir, which includes all folders, subfolders,
and files within those locations other than PNG and TIF files to dest_dir.
## Parameters:
source_dir: str
source directory to be copied.
dest_dir: str
destination directory that source_dir is copied to.
"""
#*{file_extension} references all files with the given extension in a
#directory, so list below creates the ignore pattern to all image file
#types set in ImageFileType.
#* to unpack list into arguments
ignore_pattern = get_image_ignore_pattern()
return shutil.copytree(
source_dir, dest_dir, ignore=ignore_pattern, dirs_exist_ok=True)
def get_image_ignore_pattern():
ignore_list = [f"*{extn}" for extn in get_image_extns()]
return shutil.ignore_patterns(*ignore_list)
def file_in_use(file_path: pathlib.Path) -> bool:
"""
Checks to see if file located at file_path is currently in use by process
listed in psutil.processess_iter(). If file is in use, returns True.
Else, False.
"""
file_path = str(file_path)
for process in psutil.process_iter():
try:
for item in process.open_files():
if file_path == item.path:
return True
except Exception:
pass
return False
def get_dir_name(dir: pathlib.Path) -> str:
"""
Returns directory name. If dir is a file path, returns name of parent
directory.
"""
dir = pathlib.Path(dir)
if dir.is_dir():
return pathlib.Path(dir).name
elif dir.is_file():
return pathlib.Path(dir).parent.name
def get_batch_path(source_path: pathlib.Path,
dest_dir: pathlib.Path,
suffix: str = ""
) -> pathlib.Path:
"""
returns save directory for batch processes.
### Example:
>>> get_batch_dest_path("C:/Jonah/foo", "D:/Drake", "_dr")
>>> 'D:/Drake/foo_dr'
"""
old_folder_name = get_dir_name(source_path)
new_folder_name = f"{old_folder_name}{suffix}"
return pathlib.Path(dest_dir).joinpath(new_folder_name)
def get_save_path(file_path: pathlib.Path,
source_path: pathlib.Path,
dest_path: pathlib.Path,
) -> pathlib.Path:
"""
Returns file save path for batch processes.
### Example:
>>> get_save_path("C:/Jonah/fish/pos/foo.tif", "C:/Jonah", "D:/Drake")
>>> 'D:/Drake/fish/pos/foo.tif'
"""
rel_path = file_path.relative_to(source_path)
return dest_path.joinpath(rel_path)
def is_other_image_files(file_path: pathlib.Path) -> bool:
"""
If there are other image files in the parent directory of file_path
(according to image file types in ImageFileType class), returns True.
Else, returns False.
"""
file_path = pathlib.Path(file_path)
for file in file_path.parent.iterdir():
if file_is_image(file) and not file_path.samefile(file):
return True
return False
def get_image_files(dir: pathlib.Path):
"""
Returns list of all image files in given directory.
"""
dir_path = pathlib.Path(dir)
files = []
for file in dir_path.iterdir():
with contextlib.suppress(TypeError):
if file_is_image(file):
files.append(str(file))
#natsorted() sorts list so that files are sorted by number subscripts in
#the correct order. ie, if there are 20 files named "image_1", "image_2",
#..., order is "image_1", "image_2", ... "image_19" instead of "image_1",
#"image_10", "image_11", ... "image_2", "image_3", ...
files = natsorted(files)
if isinstance(dir, pathlib.Path):
files = [pathlib.Path(file) for file in files]
return files
def get_image_lists(source_path: pathlib.Path):
"""
Returns list of lists of image files in directory and all subdirectories.
"""
image_lists = []
for root, directories, filenames in os.walk(source_path):
image_list = get_image_files(pathlib.Path(root))
if image_list:
image_lists.append(image_list)
print(image_list)
return image_lists
def get_all_subdirectories(source_path: pathlib.Path):
subdirectories = []
for root, dirs, files in os.walk(source_path):
print(root)
subdirectories.append(pathlib.Path(root))
return subdirectories
def copy_non_image_files(source_dir: pathlib.Path,
dest_dir: pathlib.Path):
"""
Copies non-image files in source_dir to dest_dir
"""
source_dir = pathlib.Path(source_dir)
dest_dir = pathlib.Path(dest_dir)
for file in pathlib.Path(source_dir).iterdir():
if not file_is_image(file):
shutil.copy(file, dest_dir)
def read_image(file_path: pathlib.Path) -> np.ndarray:
"""
reads in images in file and returns them as ndarray.
This is mostly here because tifffile loads in all images from all image
files that share metadata, which hogs memory. If file_path is a tif file,
this loads in only the images stored in that file.
"""
if get_file_type(file_path) == ImageFileType.TIF:
stack = TiffFile(file_path)
num_pages = len(stack.pages)
image = stack.asarray(range(num_pages))
else:
image = skimage.io.imread(file_path)
return image
def save_image(save_path: pathlib.Path,
image: np.ndarray,
png_compression: int = 3):
if is_png(save_path):
skimage.io.imsave(
save_path, image, check_contrast=False,
compress_level=png_compression)
else:
skimage.io.imsave(save_path, image, check_contrast=False)
def file_is_image(file_path: pathlib.Path) -> bool:
"""
If file is an image file according to file extensions in ImageFileType
class, returns True. Else, returns False.
"""
file_path = pathlib.Path(file_path)
if file_path.suffix in get_image_extns():
return True
else:
return False
def get_reduced_filename(file_path: pathlib.Path) -> str:
file_path = pathlib.Path(file_path)
filename = file_path.name
filename = filename.split(FileSubtype.OME.value)[0]
filename = remove_mmstack(filename)
filename = remove_file_extn(filename)
return filename
def is_mm(file_path: pathlib.Path):
try:
MMMetadata(file_path)
return True
except FileNotFoundError:
return False
def from_same_mm_series(file_list) -> bool:
first_file = file_list[0]
if is_mm(first_file):
filenames = MMMetadata(first_file).all_filenames
for file in file_list[1:]:
file_path = pathlib.Path(file)
if file_path.name not in filenames:
return False
else:
return True
else:
return False
class MMMetadata(object):
"""
Class to hold Micro-Manager metadata from Micro-Manager tif stack files.
## Constructor Parameters:
file_path: str
file path of MM tif image.
"""
#Possible axes in Micro-Manager metadata
_Z = "z"
_CHANNEL = "channel"
_TIME = "time"
_POSITION = "position"
def __init__(self, file_path: pathlib.Path):
self.file_path = file_path
self._metadata_dict: dict = self._get_metadata_dict()
self.summary_metadata: dict = self._metadata_dict["Summary"]
self.axis_order = self._get_axis_order()
self.frame_keys: list = [k for k in self._metadata_dict if "FrameKey" in k]
self.image_width: int = int(self.summary_metadata["Width"])
self.image_height: int = int(self.summary_metadata["Height"])
self.dims: dict = self._get_dimensions()
self.num_dims: int = len(self.dims)
self.directory = str(pathlib.Path(file_path).parent)
self.all_filenames: list = self._get_all_filenames()
self.is_multifile: bool = len(self.all_filenames) > 1
def get_image_metadata(self, image_num: int):
return MMImageMetadata(self, image_num)
def get_filename_start_num(self, filename: pathlib.Path):
filename = pathlib.Path(filename).name
for key in self.frame_keys:
meta_filename = self._metadata_dict[key]["FileName"]
if filename == meta_filename:
return self.frame_keys.index(key)
def _get_metadata_dict(self) -> dict:
file_path = pathlib.Path(self.file_path)
filename = get_reduced_filename(file_path)
if file_path.is_file():
if get_file_type(file_path) in ImageFileType:
file_path = file_path.parent
file_generator = file_path.iterdir()
parent_files = [file for file in file_generator]
for file in parent_files:
if is_mm_metadata(file_path) and filename in file.name:
return json.load(open(file))
else:
#If no metadata file is found that matches name of image, assume
#only one metadata file is in folder that doesn't match name.
for file in parent_files:
if is_mm_metadata(file):
return json.load(open(file))
raise FileNotFoundError("MMMetadata file not found in directory.")
def _get_axis_order(self):
axis_order: list = self.summary_metadata["AxisOrder"]
#Delete position axis because images at different x-y stage positions
#in MM are saved in different files with different metadata files.
axis_order.remove(self._POSITION)
return axis_order
def _get_dimensions(self) -> dict:
intended_dims = self.summary_metadata["IntendedDimensions"]
dims = {}
for axis in self.axis_order:
dims[axis] = int(intended_dims[axis])
return dims
def _get_all_filenames(self):
filenames = []
for key in self.frame_keys:
filename = self._metadata_dict[key]["FileName"]
if filename not in filenames:
filenames.append(filename)
return filenames
class MMImageMetadata(object):
def __init__(self, mm_metadata: MMMetadata, image_index: int):
self._mm_metadata: MMMetadata = mm_metadata
self.image_index: int = image_index
self.framekey: str = mm_metadata.frame_keys[image_index]
self.image_metadata: dict = mm_metadata._metadata_dict[self.framekey]
self.coords: dict = self._get_coords()
self.pixel_size: float = float(self.image_metadata["PixelSizeUm"])
self.binning: int = int(self.image_metadata["Binning"])
self.image_width: int = int(self.image_metadata["ROI"].split("-")[-2])
self.image_height: int = int(self.image_metadata["ROI"].split("-")[-1])
self.x_pos: float = float(self.image_metadata["XPositionUm"])
self.y_pos: float = float(self.image_metadata["YPositionUm"])
self.z_pos: float = float(self.image_metadata["ZPositionUm"])
def get_coords_str(self):
coords_str = ""
for item in self.coords.items():
coords_str = f"{coords_str}_{item[0]}{item[1]:03d}"
return coords_str.strip("_")
def _get_coords(self):
"""
returns dictionary with image coords. ie if the image is from
z-slice 21 and taken with channel 2, coords_dict will be
{"Z": 20, "C": 1}
"""
#Since this is meant to represent the same coord information that's
#found in each framekey, I originally thought I could just use the
#framekey to determine coordinates of each image. However, which
#coordinate position in each framekey corresponds to which coordinate
#label (C, Z, T, etc.), which one would think would be determined by
#the "Axis Order" property, isn't consistent (sometimes reverse order,
#other times not). Instead, coords are determined by an algorithm
#that matches image index with permutation of dimensions, which
#correctly aligns with framekey.
dimensions = self._mm_metadata.dims
framekey_coords = self._get_framekey_coords()
coords = {}
for key in dimensions.keys():
if dimensions[key] != 1:
coords[key] = framekey_coords[key]
return coords
def _get_framekey_coords(self):
framekey_coords = {}
framekey_nums = [int(n) for n in self.framekey.split("-")[1:]]
#Order of framekeys is Framekey-(time)-(channel)-(z)
framekey_coords[MMMetadata._Z] = framekey_nums[2]
framekey_coords[MMMetadata._CHANNEL] = framekey_nums[1]
framekey_coords[MMMetadata._TIME] = framekey_nums[0]
return framekey_coords
class LSPycroMetadata(object):
"""
Class to hold metadata from LSPycroApp.
## Constructor Parameters:
path: str
Can be path of tif file, directory of tif file, ls notes file,
or directory of notes file. If LS Pycro metadata file is not found
in directory, will recursively search for it.
"""
#Name of acquisition folder.
ACQUISITION = "Acquisition"
FISH = "Fish"
REGION = "Region"
def __init__(self, path: pathlib.Path):
self.config = ConfigParser()
self._init_config(path)
def _init_config(self, path: pathlib.Path):
path = pathlib.Path(path)
if path.is_file() and is_ls_pycro_notes(path):
self.config.read(file)
elif self.ACQUISITION in path:
cur_directory = path
while self.ACQUISITION not in cur_directory.name:
cur_directory = cur_directory.parent
file_generator = pathlib.Path(cur_directory)
file_list = [str(file) for file in file_generator]
for file in file_list:
if is_ls_pycro_notes(file):
self.config.read(file)
break
else:
raise FileNotFoundError("LSPycroMetadata file not found.")
else:
raise FileNotFoundError("LSPycroMetadata file not found. Not ls pycro acquisition.")
def get_section_dict(self, section: str) -> dict:
section_dict = {}
for item in self.config.items(section):
section_dict[item[0]] = literal_eval(item[1])
return section_dict
def get_region_dict(self, fish_num: int, region_num: int) -> dict:
section = self._get_region_section(fish_num, region_num)
return self.get_section_dict(section)
def get_num_fish(self) -> int:
num_fish = 0
for section in self.config.sections():
if self.FISH in section and self.REGION in section:
num_fish += 1
return num_fish
def get_num_regions(self, fish_num: int) -> int:
region_num = 1
section = self._get_region_section(fish_num, region_num)
while self.config.has_section(section):
region_num += 1
section = self._get_region_section(fish_num, region_num)
return region_num - 1
def _get_region_section(self, fish_num: int, region_num: int) -> str:
return f"{self.FISH} {fish_num} {self.REGION} {region_num}"