From 0b082b5d971135f56609564a18971f21d569bc41 Mon Sep 17 00:00:00 2001 From: pavel_silin Date: Tue, 23 May 2023 18:50:53 +0300 Subject: [PATCH 1/2] HCS parser perform integrity check if asked, before perform actual work --- .../cp-tools/research/hcs-parser/Dockerfile | 6 +++--- .../hcs-parser/parser/src/processors.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/deploy/docker/cp-tools/research/hcs-parser/Dockerfile b/deploy/docker/cp-tools/research/hcs-parser/Dockerfile index 9e7cfcd0df..ab1dbcc632 100644 --- a/deploy/docker/cp-tools/research/hcs-parser/Dockerfile +++ b/deploy/docker/cp-tools/research/hcs-parser/Dockerfile @@ -24,8 +24,8 @@ RUN yum install -y curl \ openssh-server \ wget \ unzip \ - python && \ - python-devel && \ + python \ + python-devel \ gcc && \ curl https://cloud-pipeline-oss-builds.s3.amazonaws.com/tools/pip/2.7/get-pip.py | python - && \ pip install pillow==9.2.0 tifffile==2019.7.26.2 numpy==1.16.6 imagecodecs-lite==2019.12.3 pandas==0.24.2 openpyxl==2.6.4 @@ -51,7 +51,7 @@ RUN mkdir -p "$ANACONDA_HOME" && \ ImageMagick \ java-1.8.0-openjdk-devel && \ pip3 install $CP_PIP_EXTRA_ARGS -I -q generate-tiff-offsets==0.1.7 && \ - pip3 install $CP_PIP_EXTRA_ARGS -I -q awscli + pip3 install $CP_PIP_EXTRA_ARGS -I -q awscli && \ conda deactivate ARG BIOFORMATS_TO_RAW_VERSION=0.4.0 diff --git a/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py b/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py index 3cc87b0737..c53af024dc 100644 --- a/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py +++ b/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py @@ -42,6 +42,8 @@ MEASUREMENT_INDEX_FILE_PATH = '/{}/{}'.format(HCS_IMAGE_DIR_NAME, HCS_INDEX_FILE_NAME) MEASUREMENT_INDEX_FILE_FORCE_COPY_TO_PARSER_DIR = os.getenv('HCS_PARSING_INDEX_FILE_FORCE_COPY_TO_PARSER_DIR', 'false') LOCALIZE_USE_PIPE = os.getenv('HCS_PARSING_LOCALIZE_USE_PIPE', 'false') +PERFORM_IMAGE_INTEGRITY_CHECK = os.getenv('HCS_PARSING_LOCALIZE_USE_PIPE', 'true').lower() == "true" + HCS_EVAL_DIR_NAME = os.getenv('HCS_EVAL_DIR_NAME', 'eval') EVAL_PROCESSING_ONLY = get_bool_run_param('HCS_PARSING_EVAL_ONLY') @@ -361,6 +363,8 @@ def process_file(self): return 2 self.create_tmp_stat_file() hcs_index_file_path = self.hcs_root_dir + MEASUREMENT_INDEX_FILE_PATH + if PERFORM_IMAGE_INTEGRITY_CHECK and not self._validate_hcs_files_integrity(hcs_index_file_path): + return 1 time_series_details = self._extract_time_series_details(hcs_index_file_path) self.generate_ome_xml_info_file() xml_info_tree = ET.parse(self.ome_xml_info_file_path).getroot() @@ -952,3 +956,18 @@ def get_channel_dimensions(self, hcs_file_root, wells_grid_mapping): entry.find(hcs_schema_prefix + 'ImageResolutionY').text = \ str(float(resolution_y) * y_scaling).upper() return channel_dimensions + + def _validate_hcs_files_integrity(self, hcs_index_file_path): + hcs_xml_info_tree = ET.parse(hcs_index_file_path).getroot() + hcs_schema_prefix = HcsParsingUtils.extract_xml_schema(hcs_xml_info_tree) + images_list = hcs_xml_info_tree.find(hcs_schema_prefix + 'Images') + for image in images_list.findall(hcs_schema_prefix + 'Image'): + image_path = image.find(hcs_schema_prefix + 'URL').text + fill_image_path = os.path.join(self.hcs_root_dir, HCS_IMAGE_DIR_NAME, image_path) + if not os.path.exists(fill_image_path): + self._processing_logger.log_info( + '{} file contains file with path {} but it cannot be found in path: {}! Failing...' + .format(hcs_index_file_path, image_path, fill_image_path)) + return False + self._processing_logger.log_info('Files integrity check successfully passed. All images files exists.') + return True \ No newline at end of file From 3f1a8b1338b2b9b12092e953db2a60ce6cc312f7 Mon Sep 17 00:00:00 2001 From: pavel_silin Date: Thu, 25 May 2023 19:19:00 +0300 Subject: [PATCH 2/2] HCS parser perform conversion well by well instead of whole sequence --- .../hcs-parser/convert_to_ome_tiff.sh | 7 +- .../hcs-parser/parser/src/processors.py | 284 +++++++++++------- 2 files changed, 182 insertions(+), 109 deletions(-) diff --git a/deploy/docker/cp-tools/research/hcs-parser/convert_to_ome_tiff.sh b/deploy/docker/cp-tools/research/hcs-parser/convert_to_ome_tiff.sh index b33f937476..4c00ddee8d 100644 --- a/deploy/docker/cp-tools/research/hcs-parser/convert_to_ome_tiff.sh +++ b/deploy/docker/cp-tools/research/hcs-parser/convert_to_ome_tiff.sh @@ -17,13 +17,14 @@ INDEX_FILE_PATH="$1" IMAGE_PREVIEW_DATA_ROOT="$2" SEQUENCE_ID="$3" -OME_TIFF_IMAGE_NAME="$4" +WELL_ID="$4" +OME_TIFF_IMAGE_NAME="$5" -RAW_IMAGE_DIR=$(mktemp -d --dry-run "$IMAGE_PREVIEW_DATA_ROOT/$SEQUENCE_ID/data_XXXXX.raw/") +RAW_IMAGE_DIR=$(mktemp -d --dry-run "$IMAGE_PREVIEW_DATA_ROOT/$SEQUENCE_ID/$WELL_ID/data_XXXXX.raw/") if [[ -z "$OME_TIFF_IMAGE_NAME" ]]; then OME_TIFF_IMAGE_NAME="${HCS_PARSER_OME_TIFF_FILE_NAME:-data.ome.tiff}" fi -OME_TIFF_IMAGE_PATH="$IMAGE_PREVIEW_DATA_ROOT/$SEQUENCE_ID/$OME_TIFF_IMAGE_NAME" +OME_TIFF_IMAGE_PATH="$IMAGE_PREVIEW_DATA_ROOT/$SEQUENCE_ID/$WELL_ID/$OME_TIFF_IMAGE_NAME" HCS_PROCESSING_TASK="${HCS_PROCESSING_TASK:-HCS processing}" diff --git a/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py b/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py index c53af024dc..66ce25a599 100644 --- a/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py +++ b/deploy/docker/cp-tools/research/hcs-parser/parser/src/processors.py @@ -36,6 +36,7 @@ RAW_TO_OME_TIFF_FLAGS = os.getenv('HCS_PARSING_RAW2OMETIFF_EXTRA_FLAGS') BFORMATS_TO_RAW_FLAGS = os.getenv('HCS_PARSING_BIOFORMATS2RAW_EXTRA_FLAGS') HCS_PARSING_OME_TIFF_FILE_NAME = os.getenv('HCS_PARSING_OME_TIFF_FILE_NAME', 'data.ome.tiff') +HCS_PARSING_OVERVIEW_DATA_OME_TIFF = 'overview_data.ome.tiff' PLANE_COORDINATES_DELIMITER = os.getenv('HCS_PARSING_PLANE_COORDINATES_DELIMITER', '_') HCS_INDEX_FILE_NAME = os.getenv('HCS_PARSING_INDEX_FILE_NAME', 'Index.xml') HCS_IMAGE_DIR_NAME = os.getenv('HCS_PARSING_IMAGE_DIR_NAME', 'Images') @@ -149,6 +150,7 @@ def __init__(self, hcs_root_dir, hcs_img_path): self.stat_file_path = HcsParsingUtils.get_stat_file_name(self.hcs_img_path) self.tmp_stat_file_path = HcsParsingUtils.get_stat_active_file_name(self.hcs_img_path) self.tmp_local_dir = HcsParsingUtils.generate_local_service_directory(self.hcs_img_path) + self.local_preview_dir = os.path.join(self.tmp_local_dir, 'preview') self.parsing_start_time = None self._processing_logger = HcsFileLogger(self.hcs_root_dir) @@ -196,7 +198,7 @@ def extract_plate_from_ome_xml(ome_xml_info_root): return ome_plate @staticmethod - def calculate_wells_padding_for_ome(hcs_xml_info_root, ome_xml_info_root): + def calculate_well_padding_for_ome(hcs_xml_info_root, ome_xml_info_root): wells_x_padding_hcs, \ wells_y_padding_hcs = HcsFileParser.extract_first_well_coordinates_hcs_xml(hcs_xml_info_root) wells_x_padding_ome, \ @@ -305,17 +307,13 @@ def _write_hcs_file(self, time_series_details, plate_width, plate_height, commen hcs_file_root_dir = os.path.dirname(self.hcs_img_path) source_dir = os.path.relpath(self.hcs_root_dir, hcs_file_root_dir) preview_dir = os.path.relpath(self.hcs_img_service_dir, hcs_file_root_dir) - ome_data_file_name = HCS_PARSING_OME_TIFF_FILE_NAME - ome_offsets_file_name = ome_data_file_name[:ome_data_file_name.find('.')] + '.offsets.json' details = { 'sourceDir': source_dir, 'previewDir': preview_dir, 'time_series_details': time_series_details, 'plate_height': plate_height, 'plate_width': plate_width, - 'comment': comment, - 'ome_data_file_name': ome_data_file_name, - 'ome_offsets_file_name': ome_offsets_file_name + 'comment': comment } self._processing_logger.log_info('Saving preview info [source={}; preview={}] to [{}]' .format(self.hcs_root_dir, self.hcs_img_service_dir, self.hcs_img_path)) @@ -344,6 +342,17 @@ def _localize_related_files(self): local_tmp_dir_trailing)) return localization_result == 0 + def _move_results_to_cloud(self): + if LOCALIZE_USE_PIPE == "true": + cloud_transfer_result = os.system('pipe storage cp -f -r "{}" "{}"' + .format(self.local_preview_dir, + HcsParsingUtils.extract_cloud_path(self.hcs_img_service_dir))) + else: + cloud_transfer_result = os.system('aws s3 sync "{}" "{}"' + .format(self.local_preview_dir, + HcsParsingUtils.extract_cloud_path(self.hcs_img_service_dir))) + return cloud_transfer_result == 0 + def process_file(self): """Process the specified HCS file @@ -361,70 +370,97 @@ def process_file(self): and not HcsParsingUtils.active_processing_exceed_timeout(self.tmp_stat_file_path): self._processing_logger.log_info('This file is processed by another parser, skipping...') return 2 + self.create_tmp_stat_file() hcs_index_file_path = self.hcs_root_dir + MEASUREMENT_INDEX_FILE_PATH if PERFORM_IMAGE_INTEGRITY_CHECK and not self._validate_hcs_files_integrity(hcs_index_file_path): return 1 - time_series_details = self._extract_time_series_details(hcs_index_file_path) + self.generate_ome_xml_info_file() xml_info_tree = ET.parse(self.ome_xml_info_file_path).getroot() plate_width, plate_height = self._get_plate_configuration(xml_info_tree) wells_tags = self.read_wells_tags() if wells_tags: self._processing_logger.log_info("Tags " + str(wells_tags)) + + time_series_details = self._extract_time_series_details(hcs_index_file_path) if not TAGS_PROCESSING_ONLY and not EVAL_PROCESSING_ONLY: if not self._localize_related_files(): self._processing_logger.log_info('Some errors occurred during copying files from the bucket, exiting...') return 1 - else: - self._processing_logger.log_info('Localization is finished.') - local_preview_dir = os.path.join(self.tmp_local_dir, 'preview') - hcs_local_index_file_path = get_path_without_trailing_delimiter(self.tmp_local_dir) \ - + MEASUREMENT_INDEX_FILE_PATH - for sequence_id, timepoints in time_series_details.items(): - self._processing_logger.log_info('Processing sequence with id={}'.format(sequence_id)) - sequence_index_file_path = self.extract_sequence_data(sequence_id, hcs_local_index_file_path) - conversion_result = os.system('bash "{}" "{}" "{}" {}'.format( - OME_TIFF_SEQUENCE_CREATION_SCRIPT, sequence_index_file_path, local_preview_dir, sequence_id)) - if conversion_result != 0: - self._processing_logger.log_info('File processing was not successful...') - return 1 - sequence_overview_index_file_path, wells_grid_mapping = self.build_sequence_overview_index(sequence_index_file_path) - conversion_result = os.system('bash "{}" "{}" "{}" {} "{}"'.format( - OME_TIFF_SEQUENCE_CREATION_SCRIPT, sequence_overview_index_file_path, local_preview_dir, - sequence_id, 'overview_data.ome.tiff')) - if conversion_result != 0: - self._processing_logger.log_info('File processing was not successful: well preview generation failure') - return 1 - self.write_dict_to_file(os.path.join(local_preview_dir, sequence_id, 'wells_map.json'), - self.build_wells_map(sequence_id, wells_grid_mapping, wells_tags)) - if LOCALIZE_USE_PIPE == "true": - cloud_transfer_result = os.system('pipe storage cp -f -r "{}" "{}"' - .format(local_preview_dir, - HcsParsingUtils.extract_cloud_path(self.hcs_img_service_dir))) - else: - cloud_transfer_result = os.system('aws s3 sync "{}" "{}"' - .format(local_preview_dir, - HcsParsingUtils.extract_cloud_path(self.hcs_img_service_dir))) - if cloud_transfer_result != 0: + self._processing_logger.log_info('Localization is finished.') + + for sequence_id, _ in time_series_details.items(): + wells_mapping = self._perform_sequence_processing(sequence_id, wells_tags) + self.write_dict_to_file( + os.path.join(self.local_preview_dir, sequence_id, 'wells_map.json'), + HcsFileParser.ordered_by_coords(wells_mapping) + ) + + if not self._move_results_to_cloud(): self._processing_logger.log_info('Results transfer was not successful...') return 1 + self._write_hcs_file(time_series_details, plate_width, plate_height) + if not EVAL_PROCESSING_ONLY: tags_processing_result = self.try_process_tags(xml_info_tree, wells_tags) if TAGS_PROCESSING_ONLY: if wells_tags: - for sequence_id, timepoints in time_series_details.items(): + for sequence_id, _ in time_series_details.items(): path = os.path.join(self.hcs_img_service_dir, sequence_id, 'wells_map.json') self.write_dict_to_file(path, self.update_wells_json(path, wells_tags)) return tags_processing_result + if not TAGS_PROCESSING_ONLY: eval_processing_result = self.try_process_eval() if EVAL_PROCESSING_ONLY: return eval_processing_result + self.create_stat_file() return 0 + def _perform_sequence_processing(self, sequence_id, wells_tags): + hcs_local_index_file_path = get_path_without_trailing_delimiter(self.tmp_local_dir) \ + + MEASUREMENT_INDEX_FILE_PATH + self._processing_logger.log_info('Processing sequence with id={}'.format(sequence_id)) + sequence_index_file_path = self.extract_sequence_data(sequence_id, hcs_local_index_file_path) + + wells_mapping = dict() + for well_id in self._extract_well_ids(sequence_index_file_path): + exit_code, well_key, well_details = self._perform_well_processing( + sequence_id, well_id, sequence_index_file_path, hcs_local_index_file_path, wells_tags + ) + if exit_code != 0: + return exit_code + wells_mapping[well_key] = well_details + return wells_mapping + + def _perform_well_processing(self, sequence_id, well_id, sequence_index_file_path, + hcs_local_index_file_path, wells_tags): + self._processing_logger.log_info('Processing well with id={}'.format(well_id)) + well_index_file_path = self.extract_well_data(well_id, sequence_index_file_path, + os.path.dirname(hcs_local_index_file_path)) + conversion_result = os.system('bash "{}" "{}" "{}" {} "{}"'.format( + OME_TIFF_SEQUENCE_CREATION_SCRIPT, well_index_file_path, + self.local_preview_dir, sequence_id, well_id) + ) + if conversion_result != 0: + self._processing_logger.log_info('File processing was not successful...') + return 1, None, None + + well_overview_index_file_path, wells_grid_mapping = self.build_well_overview_index(well_index_file_path) + conversion_result = os.system('bash "{}" "{}" "{}" {} "{}" "{}"'.format( + OME_TIFF_SEQUENCE_CREATION_SCRIPT, well_overview_index_file_path, self.local_preview_dir, + sequence_id, well_id, HCS_PARSING_OVERVIEW_DATA_OME_TIFF) + ) + if conversion_result != 0: + self._processing_logger.log_info('File processing was not successful: well preview generation failure') + return 1, None, None + + well_key, well_details = self.build_well_map(sequence_id, well_id, wells_grid_mapping, wells_tags) + return 0, well_key, well_details + def update_wells_json(self, path, wells_tags): self._processing_logger.log_info('Updating well tags for %s' % path) with open(path, 'r') as well_json: @@ -442,28 +478,14 @@ def extract_sequence_data(self, target_sequence_id, hcs_local_index_file_path): images_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Images') sequence_data_local_dir = os.path.join(self.tmp_local_dir, target_sequence_id) self._mkdir(sequence_data_local_dir) - src_images_dir = os.path.dirname(hcs_local_index_file_path) sequence_image_ids = set() images = images_list.findall(hcs_schema_prefix + 'Image') - image_subfolders = set() - for image in images: - file_name = image.find(hcs_schema_prefix + 'URL').text - last_delim_index = file_name.rfind(PATH_DELIMITER) - if last_delim_index > 0: - image_subfolders.add(file_name[:last_delim_index]) - for path in image_subfolders: - self._mkdir(os.path.join(sequence_data_local_dir, path)) - self._mkdir(os.path.join(sequence_data_local_dir, OVERVIEW_DIR_NAME, path)) for image in images: sequence_id = image.find(hcs_schema_prefix + 'SequenceID').text if sequence_id != target_sequence_id: images_list.remove(image) else: sequence_image_ids.add(image.find(hcs_schema_prefix + 'id').text) - file_name = image.find(hcs_schema_prefix + 'URL').text - src_file_path = os.path.join(src_images_dir, file_name) - dest_file_path = os.path.join(sequence_data_local_dir, file_name) - shutil.move(src_file_path, dest_file_path) sequence_wells = set() wells_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Wells') for well in wells_list.findall(hcs_schema_prefix + 'Well'): @@ -486,47 +508,43 @@ def extract_sequence_data(self, target_sequence_id, hcs_local_index_file_path): hcs_xml_info_tree.write(sequence_index_file_path) return sequence_index_file_path - def build_wells_map(self, sequence_id, wells_grid_mapping, wells_tags): - hcs_index_file_path = os.path.join(self.tmp_local_dir, sequence_id, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) + def build_well_map(self, sequence_id, well_id, wells_grid_mapping, wells_tags): + hcs_index_file_path = os.path.join(self.tmp_local_dir, sequence_id, well_id, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) ome_xml_file_path = os.path.join(os.path.dirname(hcs_index_file_path), 'Index.ome.xml') self.generate_bioformats_ome_xml(hcs_index_file_path, ome_xml_file_path) - preview_hcs_index_file_path = os.path.join(self.tmp_local_dir, sequence_id, OVERVIEW_DIR_NAME, + preview_hcs_index_file_path = os.path.join(self.tmp_local_dir, sequence_id, well_id, OVERVIEW_DIR_NAME, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) preview_ome_xml_file_path = os.path.join(os.path.dirname(preview_hcs_index_file_path), 'Index.ome.xml') self.generate_bioformats_ome_xml(preview_hcs_index_file_path, preview_ome_xml_file_path) hcs_xml_info_root = ET.parse(hcs_index_file_path).getroot() ome_xml_info_root = ET.parse(ome_xml_file_path).getroot() - wells_x_padding, wells_y_padding = self.calculate_wells_padding_for_ome(hcs_xml_info_root, ome_xml_info_root) + well_x_padding, well_y_padding = self.calculate_well_padding_for_ome(hcs_xml_info_root, ome_xml_info_root) ome_plate = self.extract_plate_from_ome_xml(ome_xml_info_root) ome_schema_prefix = HcsParsingUtils.extract_xml_schema(ome_xml_info_root) - measured_wells = self.find_measured_wells(ome_plate, ome_schema_prefix, wells_x_padding, wells_y_padding) + well_key, well_fields = self.define_measured_well(ome_plate, ome_schema_prefix, well_x_padding, well_y_padding) - wells_mapping = dict() is_well_round, well_size = self.extract_well_configuration(hcs_xml_info_root) self._processing_logger.log_info('Extracted the following plate configuration: round [%s], size [%f]' % ('true' if is_well_round else 'false', well_size)) - for well_key, fields_list in measured_wells.items(): - chunks = well_key.split(PLANE_COORDINATES_DELIMITER) - well_tuple = (chunks[0], chunks[1]) - well_tags = wells_tags.get(well_tuple, {}) - wells_mapping[well_key] = self.build_well_details(fields_list, well_size, is_well_round, - wells_grid_mapping[well_tuple], well_tags) + chunks = well_key.split(PLANE_COORDINATES_DELIMITER) + well_coord_tuple = (chunks[0], chunks[1]) + well_tags = wells_tags.get(well_coord_tuple, {}) + well_details = self.build_well_details(well_id, well_fields, well_size, is_well_round, + wells_grid_mapping[well_coord_tuple], well_tags) preview_ome_xml_info_root = ET.parse(preview_ome_xml_file_path).getroot() preview_ome_plate = self.extract_plate_from_ome_xml(preview_ome_xml_info_root) - for well in preview_ome_plate.findall(ome_schema_prefix + 'Well'): - well_x_coord = int(well.get('Column')) + wells_x_padding - well_y_coord = int(well.get('Row')) + wells_y_padding - coords_key = self.build_cartesian_coords_key(well_x_coord, well_y_coord) - if coords_key in wells_mapping: - well_sample = well.find(ome_schema_prefix + 'WellSample') - well_image_id = well_sample.find(ome_schema_prefix + 'ImageRef').get('ID') - well_details = wells_mapping[coords_key] - well_details['well_overview'] = well_image_id - wells_mapping[coords_key] = well_details - return HcsFileParser.ordered_by_coords(wells_mapping) - - def build_well_details(self, fields_list, well_size, is_well_round, well, well_tags): + preview_ome_well = preview_ome_plate.find(ome_schema_prefix + 'Well') + well_sample = preview_ome_well.find(ome_schema_prefix + 'WellSample') + well_image_id = well_sample.find(ome_schema_prefix + 'ImageRef').get('ID') + well_details['well_overview'] = well_image_id + return well_key, well_details + + def build_well_details(self, well_id, fields_list, well_size, is_well_round, well, well_tags): + + def __get_path_to_well_file(file_name): + return os.path.join(well_id, file_name) + x_coords = set() y_coords = set() for field in fields_list: @@ -559,7 +577,13 @@ def build_well_details(self, fields_list, well_size, is_well_round, well, well_t field_y_coord = y_coords.index(field.y) + 1 + y_coord_padding to_ome_mapping[self.build_cartesian_coords_key(field_x_coord, field_y_coord)] = field.ome_image_id coordinates[field.ome_image_id] = (field.x, field.y) + ome_tiff_offsets_file_name = HCS_PARSING_OME_TIFF_FILE_NAME[:HCS_PARSING_OME_TIFF_FILE_NAME.find('.')] + '.offsets.json' + overview_ome_tiff_file_name = HCS_PARSING_OVERVIEW_DATA_OME_TIFF[:HCS_PARSING_OVERVIEW_DATA_OME_TIFF.find('.')] + '.offsets.json' well_details = { + 'path': __get_path_to_well_file(HCS_PARSING_OME_TIFF_FILE_NAME), + 'offsets_path': __get_path_to_well_file(ome_tiff_offsets_file_name), + 'overview_path': __get_path_to_well_file(HCS_PARSING_OVERVIEW_DATA_OME_TIFF), + 'overview_offsets_path': __get_path_to_well_file(overview_ome_tiff_file_name), 'width': well_view_width, 'height': well_view_height, 'round_radius': round(well_viewer_radius, 2) if is_well_round else None, @@ -570,22 +594,19 @@ def build_well_details(self, fields_list, well_size, is_well_round, well, well_t } return well_details - def find_measured_wells(self, ome_plate, ome_schema_prefix, wells_x_padding, wells_y_padding): - measured_wells = {} - for well in ome_plate.findall(ome_schema_prefix + 'Well'): - well_x_coord = int(well.get('Column')) + wells_x_padding - well_y_coord = int(well.get('Row')) + wells_y_padding - well_fields = set() - for field in well.findall(ome_schema_prefix + 'WellSample'): - field_x_coord = field.get('PositionX') - field_y_coord = field.get('PositionY') - if field_x_coord is not None and field_y_coord is not None: - ome_image_id = field.find(ome_schema_prefix + 'ImageRef').get('ID') - well_fields.add( - FieldDetails(well_x_coord, well_y_coord, ome_image_id, field_x_coord, field_y_coord)) - if len(well_fields) > 0: - measured_wells[self.build_cartesian_coords_key(well_x_coord, well_y_coord)] = well_fields - return measured_wells + def define_measured_well(self, ome_plate, ome_schema_prefix, wells_x_padding, wells_y_padding): + well = ome_plate.find(ome_schema_prefix + 'Well') + well_x_coord = int(well.get('Column')) + wells_x_padding + well_y_coord = int(well.get('Row')) + wells_y_padding + well_fields = set() + for field in well.findall(ome_schema_prefix + 'WellSample'): + field_x_coord = field.get('PositionX') + field_y_coord = field.get('PositionY') + if field_x_coord is not None and field_y_coord is not None: + ome_image_id = field.find(ome_schema_prefix + 'ImageRef').get('ID') + well_fields.add( + FieldDetails(well_x_coord, well_y_coord, ome_image_id, field_x_coord, field_y_coord)) + return self.build_cartesian_coords_key(well_x_coord, well_y_coord), well_fields def extract_well_configuration(self, hcs_xml_info_root): root_xml_file = '-'.join(os.path.basename(self.hcs_root_dir).split('-')[:-1]) + '.xml' @@ -662,30 +683,30 @@ def try_process_tags(self, xml_info_tree, wells_tags): tags_processing_result = 1 return tags_processing_result - def build_sequence_overview_index(self, sequence_index_file_path): - hcs_xml_info_tree = ET.parse(sequence_index_file_path) + def build_well_overview_index(self, well_index_file_path): + hcs_xml_info_tree = ET.parse(well_index_file_path) hcs_xml_info_root = hcs_xml_info_tree.getroot() - sequence_data_root_path = os.path.dirname(sequence_index_file_path) - sequence_preview_dir_path = os.path.join(sequence_data_root_path, OVERVIEW_DIR_NAME) - self._mkdir(sequence_preview_dir_path) + well_data_root_path = os.path.dirname(well_index_file_path) + well_preview_dir_path = os.path.join(well_data_root_path, OVERVIEW_DIR_NAME) + self._mkdir(well_preview_dir_path) hcs_schema_prefix = HcsParsingUtils.extract_xml_schema(hcs_xml_info_root) original_images_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Images') hcs_xml_info_root.remove(original_images_list) wells_grid_mapping = self.get_wells_grid_mapping(hcs_schema_prefix, original_images_list) channel_dimensions = self.get_channel_dimensions(hcs_xml_info_root, wells_grid_mapping) self._processing_logger.log_info('Scaling overview TIFF files...') - well_layers = self.build_well_layers(original_images_list, sequence_data_root_path, - channel_dimensions, sequence_preview_dir_path, wells_grid_mapping) + well_layers = self.build_well_layers(original_images_list, well_data_root_path, + channel_dimensions, well_preview_dir_path, wells_grid_mapping) wells_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Wells') self._processing_logger.log_info('Merging overview TIFF files...') for well in wells_list.findall(hcs_schema_prefix + 'Well'): - self.merge_well_layers(original_images_list, sequence_preview_dir_path, well, well_layers, + self.merge_well_layers(original_images_list, well_preview_dir_path, well, well_layers, wells_grid_mapping, channel_dimensions) hcs_xml_info_root.append(original_images_list) - preview_sequence_index_file_path = os.path.join(sequence_preview_dir_path, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) + preview_well_index_file_path = os.path.join(well_preview_dir_path, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) ET.register_namespace('', hcs_schema_prefix[1:-1]) - hcs_xml_info_tree.write(preview_sequence_index_file_path) - return preview_sequence_index_file_path, wells_grid_mapping + hcs_xml_info_tree.write(preview_well_index_file_path) + return preview_well_index_file_path, wells_grid_mapping def merge_well_layers(self, original_images_list, sequence_preview_dir_path, well, well_layers, wells_grid_mapping, channel_dimensions): @@ -970,4 +991,55 @@ def _validate_hcs_files_integrity(self, hcs_index_file_path): .format(hcs_index_file_path, image_path, fill_image_path)) return False self._processing_logger.log_info('Files integrity check successfully passed. All images files exists.') - return True \ No newline at end of file + return True + + def _extract_well_ids(self, sequence_index_file_path): + hcs_xml_info_tree = ET.parse(sequence_index_file_path).getroot() + hcs_schema_prefix = HcsParsingUtils.extract_xml_schema(hcs_xml_info_tree) + wells_list = hcs_xml_info_tree.find(hcs_schema_prefix + 'Wells') + return [well.find(hcs_schema_prefix + 'id').text for well in wells_list.findall(hcs_schema_prefix + 'Well')] + + def extract_well_data(self, well_id, sequence_index_file_path, src_images_dir): + well_local_dir = os.path.join(os.path.dirname(sequence_index_file_path), well_id) + hcs_xml_info_tree = ET.parse(sequence_index_file_path) + hcs_xml_info_root = hcs_xml_info_tree.getroot() + hcs_schema_prefix = HcsParsingUtils.extract_xml_schema(hcs_xml_info_root) + + # Remove all wells from Plate entry except selected one + plates_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Plates') + for plate in plates_list.findall(hcs_schema_prefix + 'Plate'): + for well in plate.findall(hcs_schema_prefix + 'Well'): + if well.get('id') != well_id: + plate.remove(well) + + # Remove all wells list of Wells except selected one and save image ids to use it later + well_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Wells') + images_from_well = set() + for well in well_list.findall(hcs_schema_prefix + 'Well'): + if well.find(hcs_schema_prefix + 'id').text != well_id: + well_list.remove(well) + else: + for image in well.findall(hcs_schema_prefix + 'Image'): + images_from_well.add(image.get("id")) + + # Remove all images that isn't related to selected well + image_list = hcs_xml_info_root.find(hcs_schema_prefix + 'Images') + for image in image_list.findall(hcs_schema_prefix + 'Image'): + if image.find(hcs_schema_prefix + 'id').text not in images_from_well: + image_list.remove(image) + else: + file_name = image.find(hcs_schema_prefix + 'URL').text + last_delim_index = file_name.rfind(PATH_DELIMITER) + if last_delim_index > 0: + image_subfolder= file_name[:last_delim_index] + self._mkdir(os.path.join(well_local_dir, image_subfolder)) + self._mkdir(os.path.join(well_local_dir, OVERVIEW_DIR_NAME, image_subfolder)) + file_name = image.find(hcs_schema_prefix + 'URL').text + src_file_path = os.path.join(src_images_dir, file_name) + dest_file_path = os.path.join(well_local_dir, file_name) + shutil.move(src_file_path, dest_file_path) + + well_index_file_path = os.path.join(well_local_dir, HCS_OME_COMPATIBLE_INDEX_FILE_NAME) + ET.register_namespace('', hcs_schema_prefix[1:-1]) + hcs_xml_info_tree.write(well_index_file_path) + return well_index_file_path