From 574aa2d46dc139957639aa3bfafe9588c0260070 Mon Sep 17 00:00:00 2001 From: Tim Dudgeon Date: Tue, 15 Aug 2023 10:24:57 +0100 Subject: [PATCH] reference structures, hack for xtalforms and assemblies location --- xchemalign/aligner.py | 28 ++++++++++++++++------------ xchemalign/collator.py | 18 +++++++++++++----- xchemalign/utils.py | 2 ++ 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/xchemalign/aligner.py b/xchemalign/aligner.py index 3531d5f..b2dfef6 100644 --- a/xchemalign/aligner.py +++ b/xchemalign/aligner.py @@ -11,7 +11,6 @@ # limitations under the License. import argparse -import json import os from pathlib import Path @@ -173,7 +172,7 @@ def __init__(self, version_dir, metadata, xtalforms, assemblies, logger=None): if assemblies: self.assemblies_file = assemblies else: - self.assemblies_file = self.base_dir / Constants.ASSEMBLIES_FILENAME + self.assemblies_file = self.base_dir / Constants.ASSEMBLIES_FILENAME # e.g. path/to/assemblies.yaml if logger: self.logger = logger else: @@ -210,9 +209,10 @@ def run(self): input_meta = utils.read_config_file(str(self.metadata_file)) - if not self.aligned_dir.is_dir(): - self.aligned_dir.mkdir() - self.logger.info("created aligned directory", self.aligned_dir) + # FIXME + # if not self.aligned_dir.is_dir(): + # self.aligned_dir.mkdir() + # self.logger.info("created aligned directory", self.aligned_dir) new_meta = self._perform_alignments(input_meta) @@ -224,7 +224,6 @@ def _write_output(self, collator_dict, aligner_dict): yaml.dump(aligner_dict, stream, sort_keys=False, default_flow_style=None) collator_dict[Constants.META_XTALFORMS] = aligner_dict[Constants.META_XTALFORMS] - # collator_dict[Constants.META_ASSEMBLIES] = aligner_dict[Constants.META_ASSEMBLIES] collator_dict[Constants.META_CONFORMER_SITES] = aligner_dict[Constants.META_CONFORMER_SITES] collator_dict[Constants.META_CANONICAL_SITES] = aligner_dict[Constants.META_CANONICAL_SITES] collator_dict[Constants.META_XTALFORM_SITES] = aligner_dict[Constants.META_XTALFORM_SITES] @@ -267,6 +266,9 @@ def _perform_alignments(self, meta): # Load the fs model for the new output dir fs_model = dt.FSModel.from_dir(output_path) + # hack to set the right paths + fs_model.xtalforms = self.xtalforms_file + fs_model.assemblies = self.assemblies_file if source_fs_model: fs_model.alignments = source_fs_model.alignments fs_model.reference_alignments = source_fs_model.reference_alignments @@ -289,17 +291,19 @@ def _perform_alignments(self, meta): # Get assemblies if source_fs_model: - assemblies: dict[str, dt.Assembly] = _load_assemblies( - source_fs_model.assemblies, Path(self.assemblies_file) - ) + self.logger.info('1 reading assemblies from', source_fs_model.assemblies, self.assemblies_file) + assemblies: dict[str, dt.Assembly] = _load_assemblies(source_fs_model.assemblies, self.assemblies_file) else: - assemblies = _load_assemblies(fs_model.assemblies, Path(self.assemblies_file)) + self.logger.info('2 reading assemblies from', fs_model.assemblies, self.assemblies_file) + assemblies = _load_assemblies(fs_model.assemblies, self.assemblies_file) # Get xtalforms if source_fs_model: - xtalforms: dict[str, dt.XtalForm] = _load_xtalforms(source_fs_model.xtalforms, Path(self.xtalforms_file)) + self.logger.info('1 reading xtalforms from', source_fs_model.xtalforms, self.xtalforms_file) + xtalforms: dict[str, dt.XtalForm] = _load_xtalforms(source_fs_model.xtalforms, self.xtalforms_file) else: - xtalforms = _load_xtalforms(fs_model.xtalforms, Path(self.xtalforms_file)) + self.logger.info('2 reading xtalforms from', fs_model.xtalforms, self.xtalforms_file) + xtalforms = _load_xtalforms(fs_model.xtalforms, self.xtalforms_file) # Get the dataset assignments if source_fs_model: diff --git a/xchemalign/collator.py b/xchemalign/collator.py index 7b99ec4..f6b3303 100644 --- a/xchemalign/collator.py +++ b/xchemalign/collator.py @@ -56,6 +56,7 @@ def __init__( type: str, soakdb_file_path, panddas_event_file_paths: list[Path], + reference=False, logger=None, ): self.base_path = base_path @@ -65,6 +66,7 @@ def __init__( self.panddas_event_file_paths = panddas_event_file_paths self.errors = [] self.warnings = [] + self.reference = reference if logger: self.logger = logger else: @@ -273,6 +275,7 @@ def _validate_soakdb_input(self, input, crystals): num_pdb_files = 0 num_mtz_files = 0 num_cif_files = 0 + ref_datasets = set(self.config.get(Constants.CONFIG_REF_DATASETS, [])) for index, row in df.iterrows(): count += 1 @@ -354,7 +357,10 @@ def _validate_soakdb_input(self, input, crystals): if xtal_name in crystals.keys(): self._log_warning("Crystal {} already exists, it's data will be overriden".format(xtal_name)) - data = {Constants.CONFIG_TYPE: Constants.CONFIG_TYPE_MODEL_BUILDING} + data = {} + if xtal_name in ref_datasets: + data[Constants.META_REFERENCE] = True + data[Constants.CONFIG_TYPE] = Constants.CONFIG_TYPE_MODEL_BUILDING self.logger.info("adding crystal (model_building)", xtal_name) crystals[xtal_name] = data last_updated_date = row[Constants.SOAKDB_COL_LAST_UPDATED] @@ -395,6 +401,7 @@ def _validate_soakdb_input(self, input, crystals): def _validate_manual_input(self, input, crystals): num_pdb_files = 0 num_mtz_files = 0 + ref_datasets = set(self.config.get(Constants.CONFIG_REF_DATASETS, [])) for child in (self.base_path / input.input_dir_path).iterdir(): pdb = None mtz = None @@ -421,10 +428,11 @@ def _validate_manual_input(self, input, crystals): Constants.META_SHA256: digest, } num_mtz_files += 1 - crystals[child.name] = { - Constants.CONFIG_TYPE: Constants.CONFIG_TYPE_MANUAL, - Constants.META_XTAL_FILES: data, - } + crystals[child.name] = {} + if child.name in ref_datasets: + crystals[child.name][Constants.META_REFERENCE] = True + crystals[child.name][Constants.CONFIG_TYPE] = Constants.CONFIG_TYPE_MANUAL + crystals[child.name][Constants.META_XTAL_FILES] = data if num_mtz_files < num_pdb_files: self.logger.warn( diff --git a/xchemalign/utils.py b/xchemalign/utils.py index 0edc8bc..ccf3ad5 100644 --- a/xchemalign/utils.py +++ b/xchemalign/utils.py @@ -54,6 +54,7 @@ class Constants: CONFIG_BASE_DIR = "base_dir" CONFIG_OUTPUT_DIR = "output_dir" CONFIG_TARGET_NAME = "target_name" + CONFIG_REF_DATASETS = "ref_datasets" META_RUN_ON = "run_on" META_INPUT_DIRS = "input_dirs" META_VERSION_NUM = "version_number" @@ -67,6 +68,7 @@ class Constants: META_STATUS_DEPRECATED = "deprecated" META_REASON = "reason" META_XTALS = "crystals" + META_REFERENCE = "reference" META_FILE = "file" META_SHA256 = "sha256" META_XTAL_FILES = "crystallographic_files"